def _queryFromDB(sql): try: result = pd.read_sql_query(sql, con=ENGINE) return result except Exception as ex: FileLogger.error(ex) FileLogger.error("read from db error!") return None
def readFile(filePath, encoding='utf8'): try: fp = open(filePath, mode='r', encoding=encoding) content = fp.read() return content except Exception as ex: FileLogger.error(ex) return False
def _refineColumns(datadf, columns=[]): if datadf.index.name != 'REPORT_DATE': FileLogger.error("REPORT_DATE must be the index!") elif len(columns) == 0: for col in datadf.columns: if col not in TEXTCOLUMNS: columns.append(col) return columns
def downloadFile(url, filePath): try: r = requests.get(url) with open(filePath, "wb") as fp: fp.write(r.content) except Exception as ex: FileLogger.error("downloadfile error on path: %s" % url) FileLogger.error(ex) return False
def readFile(filePath): try: fp = open(filePath, 'r') content = fp.read() return content except Exception as ex: FileLogger.error("read file error on path: %s" % filePath) FileLogger.error(ex) return False
def write2File(filePath, content, mode="w+") -> bool: try: fp = open(filePath, mode) fp.write(content) fp.flush() fp.close() return True except Exception as ex: FileLogger.error("write to file error on path: %s" % filePath) FileLogger.error(ex) return False
def queryFromDB(sql): try: engine = create_engine( "mysql+pymysql://root:4401821211@localhost:3306/stock?charset=utf8" ) result = pd.read_sql_query(sql, con=engine) return result except Exception as ex: FileLogger.error(ex) FileLogger.error("read from db error!") return None
def crawlUSStocks(): # 查询语句:select ts_code from usstock.stocklist; stockList = pd.read_csv("C:/project/Tushare/usstock/code.csv").to_numpy() for code in stockList: FileLogger.info("running on code: " + code[0]) try: crawlHistory(code[0]) time.sleep(1) except Exception as ex: FileLogger.error(ex) FileLogger.error("crawl error on code: %s" % code) time.sleep(5)
def keepOnlyQuarterData(datadf, quarter): if datadf.index.name != 'REPORT_DATE': return None if quarter == 1: return datadf[datadf['REPORT_DATE'].str.find('-03-31') != -1] elif quarter == 2: return datadf[datadf['REPORT_DATE'].str.find('-06-30') != -1] elif quarter == 3: return datadf[datadf['REPORT_DATE'].str.find('-09-30') != -1] elif quarter == 4: return datadf[datadf['REPORT_DATE'].str.find('-12-31') != -1] else: FileLogger.error("error quarter parameter!") return None
def gettodayStock(): curDate = time.strftime("%Y%m%d", time.localtime()) tryagain = True while tryagain: try: content = crawlLatestUsStocks() if content: path = "C:/project/stockdata/USDay/%s.txt" % curDate write2File(path, content, mode="w") FileLogger.info("crawl stock list successfully on date:" + curDate) tryagain = False else: time.sleep(60) except Exception as ex: FileLogger.error(ex) FileLogger.error("crawl stock list error, retry in 60 seconds") time.sleep(60)
def crawlHistory(code) -> bool: link = "https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol=%s&begin=1616585707592&period=day&type=before&count=-100000&indicator=kline,pe,pb,ps,pcf,market_capital,agt,ggt,balance" % code session = HTMLSession() r = session.get(link, headers=HEADERS, cookies=COOKIES) jsonObj = json.loads(r.content) if jsonObj['error_code'] != 0 or not jsonObj["data"].__contains__("column") or not jsonObj["data"].__contains__("item"): FileLogger.error("get content error from: %s" % code) return False columns = jsonObj["data"]["column"] items = jsonObj["data"]["item"] if len(items) > 0: path = "C:/project/stockdata/UShistory/%s.csv" % code save2csv(columns, items, path) FileLogger.info("get %d lines from code: %s" % (len(items), code)) return True
def generateQuarterYRateOnData(code, dataList, columnName): sqlTemplate = "update `stock`.`incomerate` set `%s_yrate` = %f where `ts_code` = '%s' and `end_date` = '%s' and `report_type` = '1'" lastAcculatedValue = 1 historyValues = {} rate = 0 try: for item in dataList: date = item[0] acculatedValue = item[1] if acculatedValue is None: continue # 计算季度比值,要先算出本季度的营收,income报表中都是累加的季度收益 value = acculatedValue - lastAcculatedValue historyValues[date] = value endDate = datetime.datetime.strptime(date, "%Y%m%d") lastYearDate = datetime.datetime(year=endDate.year - 1, month=endDate.month, day=endDate.day) lastYear = lastYearDate.strftime("%Y%m%d") lastYearValue = historyValues[ lastYear] if lastYear in historyValues else None if lastYearValue: rate = (value / lastYearValue - 1) * 100 sql = sqlTemplate % (columnName, rate, code, date) engine.execute(sql) # print("%s %d %d %d %d %f" % (date, lastAcculatedValue, acculatedValue, lastYearValue, value, rate)) if endDate.month == 12: lastAcculatedValue = 0 else: lastAcculatedValue = acculatedValue except Exception as ex: FileLogger.error(ex) FileLogger.error( "write to DB for generateQuarterYRateOnData error on sql: %s" % sql)
def generateQuarterRateOnData(code, dataList, columnName): sqlTemplate = "update `stock`.`incomerate` set `%s_rate` = %f where `ts_code` = '%s' and `end_date` = '%s' and `report_type` = '1'" lastDate = datetime.datetime.strptime('19900101', "%Y%m%d") lastValue = 1 lastAcculatedValue = 0 rate = 0 try: for item in dataList: date = item[0] acculatedValue = item[1] if acculatedValue is None or math.isnan(acculatedValue): continue value = acculatedValue - lastAcculatedValue endDate = datetime.datetime.strptime(date, "%Y%m%d") delta = endDate - lastDate if delta > datetime.timedelta(days=135): # do nothing, rate should be none pass else: # 计算季度比值,要先算出本季度的营收,income报表中都是累加的季度收益 rate = (value / lastValue - 1) * 100 sql = sqlTemplate % (columnName, rate, code, date) engine.execute(sql) print("%d %d %d %d %f" % (lastAcculatedValue, acculatedValue, lastValue, value, rate)) lastDate = endDate lastValue = value if endDate.month == 12: lastAcculatedValue = 0 else: lastAcculatedValue = acculatedValue except Exception as ex: FileLogger.error(ex) FileLogger.error( "write to DB for generateQuarterRateOnData error on sql: %s" % sql)
def generateQuarterData(code, dataList, columnName): sqlTemplate = "update `stock`.`income` set `%s_qtr` = %f where `ts_code` = '%s' and `end_date` = '%s' and `report_type` = '1'" try: for index, row in dataList.iterrows(): endDate = datetime.datetime.strptime(row['end_date'], "%Y%m%d") lastEndDate = None lastValue = 0 quarterValue = 0 if endDate.month == 3 and endDate.day == 31: quarterValue = row[columnName] elif endDate.month == 6 and endDate.day == 30: lastEndDate = '%d0331' % endDate.year elif endDate.month == 9 and endDate.day == 30: lastEndDate = '%d0630' % endDate.year elif endDate.month == 12 and endDate.day == 31: lastEndDate = '%d0930' % endDate.year if lastEndDate: lastRow = dataList[dataList['end_date'] == lastEndDate].to_numpy() if len(lastRow) > 0: lastValue = lastRow[0][1] quarterValue = row[columnName] - lastValue print("%s %d %d" % (row['end_date'], row[columnName], quarterValue)) sql = sqlTemplate % (columnName, quarterValue, code, row['end_date']) engine.execute(sql) except Exception as ex: FileLogger.error(ex) FileLogger.error( "write to DB for generateQuarterRateOnData error on sql: %s" % sql)
FileLogger.info("get %d records on code: %s" % (len(records), code)) if len(records) != 0: content = json.dumps(records) path = "C:/project/stockdata/StockNotices/%s.json" % code write2File(path, content) if __name__ == "__main__": stockList = getJsonFromFile("C:/project/stockdata/StockNotices/stock.json") stockList = stockList["stockList"] # stockList = [{"orgId":"9900002701","category":"A股","code":"002127","pinyin":"njds","zwjc":"南极电商"}] for stock in stockList: FileLogger.info("running on stock: %s(%s)" % (stock["zwjc"], stock["code"])) filePath = "C:/project/stockdata/StockNotices/%s.json" % stock['code'] if (os.path.exists(filePath)): continue try: crawlStockNotices(stock["code"], stock["orgId"]) time.sleep(1) except Exception as ex: FileLogger.error(ex) FileLogger.error("crawl balance error on code: %s" % stock["code"]) time.sleep(3)
if __name__ == "__main__": # http://www.cninfo.com.cn/new/announcement/bulletin_detail?announceId=13519195&flag=true&announceTime=2004-01-17 # retrieveAnualQuarterlyReport() stockdf = pd.read_csv("C:/project/stockdata/StockNoticesFile/annualreportlist.csv", dtype={'code': np.str, 'year': np.str}) # stockdf = stockdf[stockdf['code'] == '000002'] stockList = stockdf[['code', 'name', 'year', 'announcementId', 'url']].to_numpy() # stockList = stockList[1:3] try: for stock in stockList: fileName = "[%s]%s年报-%s" % (stock[1], stock[2], stock[3]) savePath = "C:/project/stockdata/StockNoticesFile/pdf/%s.pdf" % fileName # make sure it's a valid path, no \/:?*"<>| savePath = savePath.replace("*", "") unresolvedPath = "C:/project/stockdata/StockNoticesFile/unresolved/%s.pdf" % fileName url = stock[4] if os.path.exists(savePath) or os.path.exists(unresolvedPath): FileLogger.info("file %s exists, skip!" % fileName) else: FileLogger.info("downloading file: %s" % fileName) downloadFile(url, savePath) except Exception as ex: FileLogger.error(ex) FileLogger.error("download error on file: %s" % fileName) time.sleep(3)
def retrieveAnualQuarterlyReport(): stockList = getJsonFromFile("C:/project/stockdata/StockNotices/stock.json") stockList = stockList["stockList"] stockList = [{"orgId":"9900002701","category":"A股","code":"002127","pinyin":"njds","zwjc":"南极电商"}] # stockList = [{"orgId":"gssz0000002","category":"A股","code":"000002","pinyin":"njds","zwjc":"万科A"}] for stock in stockList: FileLogger.info("running on stock: %s(%s)" % (stock["zwjc"], stock["code"])) try: filePath = "C:/project/stockdata/StockNotices/%s.json" % stock['code'] jsonList = getJsonFromFile(filePath) annualDf = None for jsonObj in jsonList: announcementType = jsonObj['announcementType'] fileType = jsonObj['adjunctType'] # 得到公告类型,一季报半年报三季报年报 # 公告类型:{'01030501': 第一季度报全文, '01030701':第三季度报, '01030301': 半年报, '01030101':年报全文} noticeType = None if announcementType.find("01030101") != -1: noticeType = "年报" elif announcementType.find("01030701") != -1: noticeType = "三季度报" elif announcementType.find("01030301") != -1: noticeType = "半年报" elif announcementType.find("01030501") != -1: noticeType = "一季度报" if noticeType is not None and (fileType == 'PDF' or filePath == 'PDF ' or fileType == 'pdf'): FileLogger.info("downloading file: %s" % jsonObj["announcementTitle"]) noticeDay = jsonObj['adjunctUrl'][10:20] url = "http://www.cninfo.com.cn/new/announcement/download?bulletinId=%s&announceTime=%s" % (jsonObj['announcementId'], noticeDay) annualData = { 'code': jsonObj['secCode'], 'name': jsonObj['secName'], 'announcementId': jsonObj['announcementId'], 'title': jsonObj['announcementTitle'], 'noticeDay': noticeDay, 'fileType': jsonObj['adjunctType'], 'url': url, 'Type': noticeType, 'year': int(noticeDay[0:4])-1 if noticeType == "年报" else int(noticeDay[0:4]) } if annualDf is None: annualDf = pd.DataFrame(columns=annualData.keys()) annualDf = annualDf.append(annualData, ignore_index=True) else: annualDf = annualDf.append(annualData, ignore_index=True) time.sleep(0) # save to DB from sqlalchemy import create_engine ENGINE = create_engine("mysql+pymysql://root:4401821211@localhost:3306/eastmoney?charset=utf8") annualDf.to_sql(name="reportbasic", con=ENGINE, if_exists="append") except Exception as ex: FileLogger.error(ex) FileLogger.error("retrieve error on code: %s" % stock["code"]) time.sleep(3)
try: for item in incomeList: endDate = datetime.datetime.strptime(item[0], "%Y%m%d") delta = endDate - lastDate if delta > datetime.timedelta(days=135): # do nothing, rate should be none pass else: sql = sqlTemplate % (40, code, item[0]) print(sql) engine.execute(sql) lastDate = endDate except Exception as ex: FileLogger.error(ex) FileLogger.error("write to DB error on sql: %s" % sql) # %% income = DBLib.getIncomeFromDB('600176.SH') sortedIncome = income.sort_values(by='end_date', ascending=True) dataList = sortedIncome[['end_date', 'total_revenue']] # %% for index, row in dataList.iterrows(): print(row['end_date']) # %% row = dataList[dataList['end_date']=='20130630'].to_numpy()
gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_CURRENT_LIAB', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_EQUITY', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_LIAB_EQUITY', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_LIABILITIES', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_NONCURRENT_ASSETS', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_NONCURRENT_LIAB', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_OTHER_PAYABLE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_OTHER_RECE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_PARENT_EQUITY', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TRADE_FINASSET_NOTFVTPL', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'UNASSIGN_RPOFIT', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'USERIGHT_ASSET', 'double') executeSql(jsonSql) time.sleep(0.1) except Exception as ex: FileLogger.error(ex) FileLogger.error("parse balance error on code: %s" % code) time.sleep(3)
def write2File(filePath, content, mode="w+") -> bool: try: fp = open(filePath, mode) fp.write(content) fp.flush() fp.close() return True except Exception as ex: FileLogger.error("write to file error on path: %s" % filePath) FileLogger.error(ex) return False if __name__ == "__main__": # 查询语句:select ts_code from usstock.stocklist; stockdf = pd.read_csv("C:/project/Tushare/usstock/code.csv") errordf = pd.read_csv("C:/project/Tushare/usstock/get_error_ts_code.csv") errorList = errordf['ts_code'].to_numpy() stockList = stockdf[~stockdf['ts_code'].isin(errorList)] stockList = stockList['ts_code'].to_numpy() for code in stockList: FileLogger.info("running on code: %s" % code) try: crawlCashflow(code) time.sleep(1) except Exception as ex: FileLogger.error(ex) FileLogger.error("crawl cashflow error on code: %s" % code) time.sleep(3)
gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_PROFIT', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'NONBUSINESS_INCOME', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'NONBUSINESS_EXPENSE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_PROFIT', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'INCOME_TAX', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'NETPROFIT', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'CONTINUED_NETPROFIT', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'PARENT_NETPROFIT', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'MINORITY_INTEREST', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'DEDUCT_PARENT_NETPROFIT', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'BASIC_EPS', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'DILUTED_EPS', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'OTHER_COMPRE_INCOME', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'PARENT_OCI', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'MINORITY_OCI', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'TOTAL_COMPRE_INCOME', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'PARENT_TCI', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'MINORITY_TCI', 'double') executeSql(jsonSql) time.sleep(0.1) except Exception as ex: FileLogger.error(ex) FileLogger.error("parse income error on code: %s" % code) time.sleep(3)
def write2File(filePath, content, mode="w+") -> bool: try: fp = open(filePath, mode) fp.write(content) fp.flush() fp.close() return True except Exception as ex: FileLogger.error("write to file error on path: %s" % filePath) FileLogger.error(ex) return False if __name__ == "__main__": # 查询语句:select ts_code from usstock.stocklist; stockdf = pd.read_csv("C:/project/Tushare/usstock/code.csv") errordf = pd.read_csv("C:/project/Tushare/usstock/get_error_ts_code.csv") errorList = errordf['ts_code'].to_numpy() stockList = stockdf[~stockdf['ts_code'].isin(errorList)] stockList = stockList['ts_code'].to_numpy() for code in stockList: FileLogger.info("running on code: %s" % code) try: crawlBalance(code) time.sleep(1) except Exception as ex: FileLogger.error(ex) FileLogger.error("crawl balance error on code: %s" % code) time.sleep(3)
try: fp = open(filePath, mode) fp.write(content) fp.flush() fp.close() return True except Exception as ex: FileLogger.error("write to file error on path: %s" % filePath) FileLogger.error(ex) return False if __name__ == "__main__": # 查询语句:select ts_code from usstock.stocklist; stockdf = pd.read_csv("C:/project/Tushare/usstock/code.csv") errordf = pd.read_csv("C:/project/Tushare/usstock/get_error_ts_code.csv") errorList = errordf['ts_code'].to_numpy() stockList = stockdf[~stockdf['ts_code'].isin(errorList)] stockList = stockList['ts_code'].to_numpy() for code in stockList: FileLogger.info("running on code: %s" % code) try: crawlDivident(code) time.sleep(1) except Exception as ex: FileLogger.error(ex) FileLogger.error("crawl income error on code: %s" % code) time.sleep(3)
incomeBaseDF.to_sql(name='incomebase', con=usEngine, if_exists='append') def readFile(filePath): try: fp = open(filePath, 'r') content = fp.read() return content except Exception as ex: FileLogger.error("read file error on path: %s" % filePath) FileLogger.error(ex) return False if __name__ == "__main__": stockdf = pd.read_csv("C:/project/stockdata/USIncome/code.csv") stockList = stockdf['code'].to_numpy() # parseIncomeBaseList(stockList) for code in stockList: FileLogger.info("running on code: %s" % code) try: parseUSIncome(code) except Exception as ex: FileLogger.error(ex) FileLogger.error("write data to Database error on code: %s" % code) time.sleep(1)
'double') gatherColumnInfo(jsonSql, jsonObjects, 'FINANCE_EXPENSE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'INVEST_LOSS', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'DEFER_TAX', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'DT_ASSET_REDUCE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'DT_LIAB_ADD', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'INVENTORY_REDUCE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_RECE_REDUCE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_PAYABLE_ADD', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'OPERATE_NETCASH_OTHERNOTE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'NETCASH_OPERATENOTE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'END_CASH', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'BEGIN_CASH', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'CCE_ADDNOTE', 'double') gatherColumnInfo(jsonSql, jsonObjects, 'OPINION_TYPE', 'varchar(100)') executeSql(jsonSql) time.sleep(0.1) except Exception as ex: FileLogger.error(ex) FileLogger.error("parse cashflow error on code: %s" % code) time.sleep(3)