def test_get_balance_sheet_items(): balance_sheets = fundamental.get_balance_sheet_items( '600977', start_date='2016-06-30') assert len(balance_sheets) > 0 for item in balance_sheets: assert item['totalBookValue'] > 0 assert item['reportEventDate'] > item['reportPeriod']
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE): for _, security_item in get_security_list(start_code=start_code, end_code=end_code).iterrows(): try: # 先抓事件,有些后续抓取依赖事件 process_crawl(StockFinanceReportEventSpider, {"security_item": security_item}) current_report_period = get_report_period() # 资产负债表 path = get_balance_sheet_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) else: current_items = get_balance_sheet_items(security_item) # 当前报告期还没抓取 if current_report_period != current_items[-1]['reportPeriod']: # 报告出来了 # df = event.get_finance_report_event(security_item, index='reportPeriod') # if current_report_period in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) # 利润表 path = get_income_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) else: current_items = get_income_statement_items(security_item) # 当前报告期还没抓取 if current_report_period != current_items[-1]['reportPeriod']: # 报告出来了 # df = event.get_finance_report_event(security_item, index='reportPeriod') # if current_report_period in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) # 现金流量表 path = get_cash_flow_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) else: current_items = get_cash_flow_statement_items(security_item) # 当前报告期还没抓取 if current_report_period != current_items[-1]['reportPeriod']: # 报告出来了 # df = event.get_finance_report_event(security_item, index='reportPeriod') # if current_report_period in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) except Exception as e: logger.exception(e)
def finance_sheet_to_es(sheet_type=None, start_code=None, end_code=None, force=False): if sheet_type is None: sheet_types = [ 'balance_sheet', 'income_statement', 'cash_flow_statement' ] else: sheet_types = [sheet_type] for sheet_type in sheet_types: if sheet_type == 'balance_sheet': doc_type = BalanceSheet elif sheet_type == 'income_statement': doc_type = IncomeStatement elif sheet_type == 'cash_flow_statement': doc_type = CashFlowStatement es_index_mapping(sheet_type, doc_type) for _, security_item in get_security_list( start_code=start_code, end_code=end_code).iterrows(): try: if sheet_type == 'balance_sheet': items = get_balance_sheet_items(security_item) elif sheet_type == 'income_statement': items = get_income_statement_items(security_item) elif sheet_type == 'cash_flow_statement': items = get_cash_flow_statement_items(security_item) df = pd.DataFrame(items) df = index_df_with_time(df, index='reportPeriod') df_to_es(df, doc_type=doc_type, timestamp_filed='reportPeriod', security_item=security_item, force=force) except Exception as e: logger.exception( "index {} {} failed".format(security_item['code'], sheet_type), e)
def check_eps(security_item): income_statement_list = get_income_statement_items( security_item=security_item) for income_statement in income_statement_list: balance_sheet = get_balance_sheet_items( security_item=security_item, report_period=income_statement['reportDate']) if not balance_sheet or balance_sheet['totalShareCapital'] == 0: continue eps = (income_statement["netProfit"] - income_statement["minorityInterestIncome"]) / ( balance_sheet['totalShareCapital']) diff = eps - income_statement["EPS"] if abs(diff) >= 0.01: print( "{} EPS calculating not pass,calculating result:{},report result:{}" .format(income_statement['id'], eps, income_statement["EPS"])) else: print("{} EPS calculating pass".format(income_statement['id']))
def finance_sheet_to_es(sheet_type='balance_sheet', start_code=None, end_code=None, force=False): if sheet_type == 'balance_sheet': doc_type = BalanceSheet elif sheet_type == 'income_statement': doc_type = IncomeStatement elif sheet_type == 'cash_flow_statement': doc_type = CashFlowStatement es_index_mapping(sheet_type, doc_type) for _, security_item in get_security_list(start_code=start_code, end_code=end_code).iterrows(): query = None if not force: query = {"term": {"securityId": ""}} query["term"]["securityId"] = security_item["id"] if sheet_type == 'balance_sheet': items = get_balance_sheet_items(security_item) elif sheet_type == 'income_statement': items = get_income_statement_items(security_item) elif sheet_type == 'cash_flow_statement': items = get_cash_flow_statement_items(security_item) df = pd.DataFrame(items) df = index_df_with_time(df, index='reportPeriod') df_to_es(df, doc_type=doc_type, timestamp_filed='reportPeriod', query=query, force=force)