Beispiel #1
0
    stockList = stockdf[['ts_code', 'companytype']].to_numpy()

    # stockList = [['SZ000002', 4]]

    # add the base info into DB
    for item in stockList:
        code = item[0]
        companyType = item[1]
        # need to process companyType 1-3
        if companyType != 4:
            continue

        FileLogger.info("running on code: %s" % code)
        try:
            path = "C:/project/stockdata/EastMoneyIncome/%s.json" % code
            jsonObjects = getJsonFromFile(path)

            # add the base info into DB
            for jsonObject in jsonObjects:
                parseIncomeBasicObject(jsonObject)

            # add other info into DB
            jsonSql = {}
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_OPERATE_INCOME', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_INCOME', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_OPERATE_COST', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_COST', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'RESEARCH_EXPENSE', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_TAX_ADD', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'SALE_EXPENSE', 'double')
            gatherColumnInfo(jsonSql, jsonObjects, 'MANAGE_EXPENSE', 'double')
Beispiel #2
0
                jsonContent = json.loads(r.content)
                announcements = jsonContent["announcements"]
                if announcements is not None and len(announcements) > 0:
                    records.extend(announcements)
                FileLogger.info("get records on pageNum: %d" % pageNum)

        FileLogger.info("get %d records on code: %s" % (len(records), code))

    if len(records) != 0:
        content = json.dumps(records)
        path = "C:/project/stockdata/StockNotices/%s.json" % code
        write2File(path, content)


if __name__ == "__main__":
    stockList = getJsonFromFile("C:/project/stockdata/StockNotices/stock.json")
    stockList = stockList["stockList"]

    # stockList = [{"orgId":"9900002701","category":"A股","code":"002127","pinyin":"njds","zwjc":"南极电商"}]

    for stock in stockList:
        FileLogger.info("running on stock: %s(%s)" %
                        (stock["zwjc"], stock["code"]))
        filePath = "C:/project/stockdata/StockNotices/%s.json" % stock['code']
        if (os.path.exists(filePath)):
            continue

        try:
            crawlStockNotices(stock["code"], stock["orgId"])
            time.sleep(1)
def retrieveAnualQuarterlyReport():
    stockList = getJsonFromFile("C:/project/stockdata/StockNotices/stock.json")
    stockList = stockList["stockList"]

    stockList = [{"orgId":"9900002701","category":"A股","code":"002127","pinyin":"njds","zwjc":"南极电商"}]
    # stockList = [{"orgId":"gssz0000002","category":"A股","code":"000002","pinyin":"njds","zwjc":"万科A"}]

    for stock in stockList:
        FileLogger.info("running on stock: %s(%s)" % (stock["zwjc"], stock["code"]))
    
        try:
            filePath = "C:/project/stockdata/StockNotices/%s.json" % stock['code']
            jsonList = getJsonFromFile(filePath)

            annualDf = None
            for jsonObj in jsonList:
                announcementType = jsonObj['announcementType']
                fileType = jsonObj['adjunctType']

                # 得到公告类型,一季报半年报三季报年报
                # 公告类型:{'01030501': 第一季度报全文, '01030701':第三季度报, '01030301': 半年报, '01030101':年报全文}
                noticeType = None
                if announcementType.find("01030101") != -1: 
                    noticeType = "年报"
                elif announcementType.find("01030701") != -1:
                    noticeType = "三季度报"
                elif announcementType.find("01030301") != -1:
                    noticeType = "半年报"
                elif announcementType.find("01030501") != -1:
                    noticeType = "一季度报"

                if noticeType is not None and (fileType == 'PDF' or filePath == 'PDF ' or fileType == 'pdf'):
                    FileLogger.info("downloading file: %s" % jsonObj["announcementTitle"])
                    noticeDay = jsonObj['adjunctUrl'][10:20]
                    url = "http://www.cninfo.com.cn/new/announcement/download?bulletinId=%s&announceTime=%s" % (jsonObj['announcementId'], noticeDay)
                    
                    annualData = {
                        'code': jsonObj['secCode'],
                        'name': jsonObj['secName'],
                        'announcementId': jsonObj['announcementId'],
                        'title': jsonObj['announcementTitle'], 
                        'noticeDay': noticeDay,
                        'fileType': jsonObj['adjunctType'],
                        'url': url, 
                        'Type': noticeType, 
                        'year': int(noticeDay[0:4])-1 if noticeType == "年报" else int(noticeDay[0:4])
                    }
                    if annualDf is None:
                        annualDf = pd.DataFrame(columns=annualData.keys())
                        annualDf = annualDf.append(annualData, ignore_index=True)
                    else:
                        annualDf = annualDf.append(annualData, ignore_index=True)

            time.sleep(0)
        
            # save to DB
            from sqlalchemy import create_engine
            ENGINE = create_engine("mysql+pymysql://root:4401821211@localhost:3306/eastmoney?charset=utf8")
            annualDf.to_sql(name="reportbasic", con=ENGINE, if_exists="append")

        except Exception as ex:
            FileLogger.error(ex)
            FileLogger.error("retrieve error on code: %s" % stock["code"])
            time.sleep(3)