def income_statement_to_es(force=False): es_index_mapping('income_statement', IncomeStatement) for _, security_item in get_security_list().iterrows(): try: start_date = None if not force: query = { "term": {"securityId": ""} } query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index='income_statement', time_field='reportDate', query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['reportDate'] actions = [] for json_object in get_income_statement_items(security_item, start_date=start_date): if start_date and is_same_date(start_date, json_object['reportDate']): continue income_statement = IncomeStatement(meta={'id': json_object['id']}) fill_doc_type(income_statement, json_object) # income_statement.save() actions.append(income_statement.to_dict(include_meta=True)) if actions: resp = elasticsearch.helpers.bulk(es, actions) logger.info(resp) except Exception as e: logger.warn("wrong IncomeStatement:{},error:{}", security_item, e)
def income_statement_to_es(force=False): es_index_mapping('income_statement', IncomeStatement) for _, security_item in get_security_list().iterrows(): try: start_date = None if not force: query = {"term": {"securityId": ""}} query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index='income_statement', time_field='reportDate', query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['reportDate'] for json_object in get_income_statement_items( security_item, start_date=start_date): if start_date and is_same_date(start_date, json_object['reportDate']): continue income_statement = IncomeStatement( meta={'id': json_object['id']}) fill_doc_type(income_statement, json_object) income_statement.save() except Exception as e: logger.warn("wrong IncomeStatement:{},error:{}", security_item, e)
def get_income_statement_items(security_item, start_date=None, report_period=None, report_event_date=None, return_type='json'): """ get income statement items. Parameters ---------- security_item : SecurityItem or str the security item,id or code start_date : TimeStamp str or TimeStamp start date report_period : TimeStamp str or TimeStamp the finance report period,eg.'20170331' report_event_date : TimeStamp str or TimeStamp the finance report published date return_type : str {'json','doc'},default: 'json' Returns ------- list of IncomeStatement list of json """ security_item = to_security_item(security_item) path = get_income_statement_path(security_item) if not os.path.exists(path): return [] encoding = 'GB2312' with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # /*营业总收入*/ # 营业收入 operatingRevenue = lines[2].split()[1:-1] # /*营业总成本*/ operatingTotalCosts = lines[4].split()[1:-1] # 营业成本 operatingCosts = lines[5].split()[1:-1] # 营业税金及附加 businessTaxesAndSurcharges = lines[6].split()[1:-1] # 销售费用 sellingExpenses = lines[7].split()[1:-1] # 管理费用 ManagingCosts = lines[8].split()[1:-1] # 财务费用 financingExpenses = lines[9].split()[1:-1] # 资产减值损失 assetsDevaluation = lines[10].split()[1:-1] # 公允价值变动收益 incomeFromChangesInFairValue = lines[11].split()[1:-1] # 投资收益 investmentIncome = lines[12].split()[1:-1] # 其中:对联营企业和合营企业的投资收益 investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[ 13].split()[1:-1] # 汇兑收益 exchangeGains = lines[14].split()[1:-1] # /*营业利润*/ operatingProfit = lines[15].split()[1:-1] # 加:营业外收入 nonOperatingIncome = lines[16].split()[1:-1] # 减:营业外支出 nonOperatingExpenditure = lines[17].split()[1:-1] # 其中:非流动资产处置损失 disposalLossOnNonCurrentLiability = lines[18].split()[1:-1] # /*利润总额*/ totalProfits = lines[19].split()[1:-1] # 减:所得税费用 incomeTaxExpense = lines[20].split()[1:-1] # /*净利润*/ netProfit = lines[21].split()[1:-1] # 归属于母公司所有者的净利润 netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1] # 少数股东损益 minorityInterestIncome = lines[23].split()[1:-1] # /*每股收益*/ # 基本每股收益(元/股) EPS = lines[25].split()[1:-1] # 稀释每股收益(元/股) dilutedEPS = lines[26].split()[1:-1] # /*其他综合收益*/ otherComprehensiveIncome = lines[27].split()[1:-1] # /*综合收益总额*/ accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1] # 归属于母公司所有者的综合收益总额 attributableToOwnersOfParentCompany = lines[29].split()[1:-1] # 归属于少数股东的综合收益总额 attributableToMinorityShareholders = lines[30].split()[1:-1] result_list = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue if report_period and not is_same_date(report_period, reportDate[idx]): continue reportEventDate = get_report_event_date( security_item, report_date=reportDate[idx]) # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp( report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # /*营业总收入*/ # 营业收入 "operatingRevenue": to_float(operatingRevenue[idx]), # /*营业总成本*/ "operatingTotalCosts": to_float(operatingTotalCosts[idx]), # 营业成本 "operatingCosts": to_float(operatingCosts[idx]), # 营业税金及附加 "businessTaxesAndSurcharges": to_float(businessTaxesAndSurcharges[idx]), # 销售费用 "sellingExpenses": to_float(sellingExpenses[idx]), # 管理费用 "ManagingCosts": to_float(ManagingCosts[idx]), # 财务费用 "financingExpenses": to_float(financingExpenses[idx]), # 资产减值损失 "assetsDevaluation": to_float(assetsDevaluation[idx]), # 公允价值变动收益 "incomeFromChangesInFairValue": to_float(incomeFromChangesInFairValue[idx]), # 投资收益 "investmentIncome": to_float(investmentIncome[idx]), # 其中:对联营企业和合营企业的投资收益 "investmentIncomeFromRelatedEnterpriseAndJointlyOperating": investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx], # 汇兑收益 "exchangeGains": to_float(exchangeGains[idx]), # /*营业利润*/ "operatingProfit": to_float(operatingProfit[idx]), # 加:营业外收入 "nonOperatingIncome": to_float(nonOperatingIncome[idx]), # 减:营业外支出 "nonOperatingExpenditure": to_float(nonOperatingExpenditure[idx]), # 其中:非流动资产处置损失 "disposalLossOnNonCurrentLiability": to_float(disposalLossOnNonCurrentLiability[idx]), # /*利润总额*/ "totalProfits": to_float(totalProfits[idx]), # 减:所得税费用 "incomeTaxExpense": to_float(incomeTaxExpense[idx]), # /*净利润*/ "netProfit": to_float(netProfit[idx]), # 归属于母公司所有者的净利润 "netProfitAttributedToParentCompanyOwner": to_float(netProfitAttributedToParentCompanyOwner[idx]), # 少数股东损益 "minorityInterestIncome": to_float(minorityInterestIncome[idx]), # /*每股收益*/ # 基本每股收益(元/股) "EPS": to_float(EPS[idx]), # 稀释每股收益(元/股) "dilutedEPS": to_float(dilutedEPS[idx]), # /*其他综合收益*/ "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # /*综合收益总额*/ "accumulatedOtherComprehensiveIncome": to_float(accumulatedOtherComprehensiveIncome[idx]), # 归属于母公司所有者的综合收益总额 "attributableToOwnersOfParentCompany": to_float(attributableToOwnersOfParentCompany[idx]), # 归属于少数股东的综合收益总额 "attributableToMinorityShareholders": to_float(attributableToMinorityShareholders[idx]) } the_data = the_json if return_type == 'doc': the_data = IncomeStatement(meta={'id': the_json['id']}) fill_doc_type(the_data, the_json) if report_period and is_same_date(report_period, reportDate[idx]): return the_data result_list.append(the_data) if result_list: result_list = sorted(result_list, key=lambda x: pd.Timestamp(x['reportDate'])) return result_list