Beispiel #1
0
def test_get_balance_sheet_items():
    balance_sheets = fundamental.get_balance_sheet_items(
        '600977', start_date='2016-06-30')
    assert len(balance_sheets) > 0
    for item in balance_sheets:
        assert item['totalBookValue'] > 0
        assert item['reportEventDate'] > item['reportPeriod']
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE):
    for _, security_item in get_security_list(start_code=start_code, end_code=end_code).iterrows():
        try:
            # 先抓事件,有些后续抓取依赖事件
            process_crawl(StockFinanceReportEventSpider, {"security_item": security_item})

            current_report_period = get_report_period()

            # 资产负债表
            path = get_balance_sheet_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "balance_sheet"})
            else:
                current_items = get_balance_sheet_items(security_item)
                # 当前报告期还没抓取

                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "balance_sheet"})

            # 利润表
            path = get_income_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "income_statement"})
            else:
                current_items = get_income_statement_items(security_item)
                # 当前报告期还没抓取
                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "income_statement"})

            # 现金流量表
            path = get_cash_flow_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "cash_flow"})
            else:
                current_items = get_cash_flow_statement_items(security_item)
                # 当前报告期还没抓取
                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "cash_flow"})
        except Exception as e:
            logger.exception(e)
Beispiel #3
0
def finance_sheet_to_es(sheet_type=None,
                        start_code=None,
                        end_code=None,
                        force=False):
    if sheet_type is None:
        sheet_types = [
            'balance_sheet', 'income_statement', 'cash_flow_statement'
        ]
    else:
        sheet_types = [sheet_type]

    for sheet_type in sheet_types:
        if sheet_type == 'balance_sheet':
            doc_type = BalanceSheet
        elif sheet_type == 'income_statement':
            doc_type = IncomeStatement
        elif sheet_type == 'cash_flow_statement':
            doc_type = CashFlowStatement

        es_index_mapping(sheet_type, doc_type)

        for _, security_item in get_security_list(
                start_code=start_code, end_code=end_code).iterrows():
            try:
                if sheet_type == 'balance_sheet':
                    items = get_balance_sheet_items(security_item)
                elif sheet_type == 'income_statement':
                    items = get_income_statement_items(security_item)
                elif sheet_type == 'cash_flow_statement':
                    items = get_cash_flow_statement_items(security_item)

                df = pd.DataFrame(items)

                df = index_df_with_time(df, index='reportPeriod')

                df_to_es(df,
                         doc_type=doc_type,
                         timestamp_filed='reportPeriod',
                         security_item=security_item,
                         force=force)
            except Exception as e:
                logger.exception(
                    "index {} {} failed".format(security_item['code'],
                                                sheet_type), e)
def check_eps(security_item):
    income_statement_list = get_income_statement_items(
        security_item=security_item)
    for income_statement in income_statement_list:
        balance_sheet = get_balance_sheet_items(
            security_item=security_item,
            report_period=income_statement['reportDate'])
        if not balance_sheet or balance_sheet['totalShareCapital'] == 0:
            continue

        eps = (income_statement["netProfit"] -
               income_statement["minorityInterestIncome"]) / (
                   balance_sheet['totalShareCapital'])
        diff = eps - income_statement["EPS"]
        if abs(diff) >= 0.01:
            print(
                "{} EPS calculating not pass,calculating result:{},report result:{}"
                .format(income_statement['id'], eps, income_statement["EPS"]))
        else:
            print("{} EPS calculating pass".format(income_statement['id']))
Beispiel #5
0
def finance_sheet_to_es(sheet_type='balance_sheet',
                        start_code=None,
                        end_code=None,
                        force=False):
    if sheet_type == 'balance_sheet':
        doc_type = BalanceSheet
    elif sheet_type == 'income_statement':
        doc_type = IncomeStatement
    elif sheet_type == 'cash_flow_statement':
        doc_type = CashFlowStatement

    es_index_mapping(sheet_type, doc_type)

    for _, security_item in get_security_list(start_code=start_code,
                                              end_code=end_code).iterrows():
        query = None
        if not force:
            query = {"term": {"securityId": ""}}
            query["term"]["securityId"] = security_item["id"]

        if sheet_type == 'balance_sheet':
            items = get_balance_sheet_items(security_item)
        elif sheet_type == 'income_statement':
            items = get_income_statement_items(security_item)
        elif sheet_type == 'cash_flow_statement':
            items = get_cash_flow_statement_items(security_item)

        df = pd.DataFrame(items)

        df = index_df_with_time(df, index='reportPeriod')

        df_to_es(df,
                 doc_type=doc_type,
                 timestamp_filed='reportPeriod',
                 query=query,
                 force=force)