def yield_request(self, item=None, finance_type=None):
     if finance_type == 'balance_sheet':
         url_and_path = [(self.get_balance_sheet_url(item['code']),
                          get_balance_sheet_path(item))]
     elif finance_type == 'income_statement':
         url_and_path = [(self.get_income_statement_url(item['code']),
                          get_income_statement_path(item))]
     elif finance_type == 'cash_flow':
         url_and_path = [(self.get_cash_flow_statement_url(item['code']),
                          get_cash_flow_statement_path(item))]
     else:
         url_and_path = [(self.get_balance_sheet_url(item['code']),
                          get_balance_sheet_path(item)),
                         (self.get_income_statement_url(item['code']),
                          get_income_statement_path(item)),
                         (self.get_cash_flow_statement_url(item['code']),
                          get_cash_flow_statement_path(item))]
     for (data_url, data_path) in url_and_path:
         yield Request(url=data_url,
                       meta={
                           'path': data_path,
                           'item': item
                       },
                       headers=DEFAULT_BALANCE_SHEET_HEADER,
                       callback=self.download_finance_sheet)
예제 #2
0
 def yield_request(self, item=None, finance_type=None):
     if finance_type == 'balance_sheet':
         url_and_path = [(self.get_balance_sheet_url(item['code']), get_balance_sheet_path(item))]
     elif finance_type == 'income_statement':
         url_and_path = [(self.get_income_statement_url(item['code']), get_income_statement_path(item))]
     elif finance_type == 'cash_flow':
         url_and_path = [(self.get_cash_flow_statement_url(item['code']), get_cash_flow_statement_path(item))]
     else:
         url_and_path = [
             (self.get_balance_sheet_url(item['code']), get_balance_sheet_path(item)),
             (self.get_income_statement_url(item['code']), get_income_statement_path(item)),
             (self.get_cash_flow_statement_url(item['code']), get_cash_flow_statement_path(item))]
     for (data_url, data_path) in url_and_path:
         yield Request(url=data_url,
                       meta={'path': data_path,
                             'item': item},
                       headers=DEFAULT_BALANCE_SHEET_HEADER,
                       callback=self.download_finance_sheet)
예제 #3
0
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE):
    for _, security_item in get_security_list(start=start_code, end=end_code).iterrows():
        try:
            # 先抓事件,有些后续抓取依赖事件
            process_crawl(StockFinanceReportEventSpider, {"security_item": security_item})

            current_report_date = get_report_date()

            # 资产负债表
            path = get_balance_sheet_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "balance_sheet"})
            else:
                for balance_sheet_item in get_balance_sheet_items(security_item):
                    # 当前报告期还没抓取
                    if balance_sheet_item['reportDate'] != current_report_date:
                        # 报告出来了
                        df = event.get_finance_report_event(security_item, index='reportDate')
                        if current_report_date in df.index:
                            process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                               "report_type": "balance_sheet"})
                    break

            # 利润表
            path = get_income_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "income_statement"})
            else:
                for balance_sheet_item in get_income_statement_items(security_item):
                    if balance_sheet_item['reportDate'] != current_report_date:
                        # 报告出来了
                        df = event.get_finance_report_event(security_item, index='reportDate')
                        if current_report_date in df.index:
                            process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                               "report_type": "income_statement"})
                    break

            # 现金流量表
            path = get_cash_flow_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "cash_flow"})
            else:
                for balance_sheet_item in get_cash_flow_statement_items(security_item):
                    if balance_sheet_item['reportDate'] != current_report_date:
                        # 报告出来了
                        df = event.get_finance_report_event(security_item, index='reportDate')
                        if current_report_date in df.index:
                            process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                               "report_type": "cash_flow"})
                    break
        except Exception as e:
            logger.error(e)
예제 #4
0
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE):
    for _, security_item in get_security_list(start_code=start_code, end_code=end_code).iterrows():
        try:
            # 先抓事件,有些后续抓取依赖事件
            process_crawl(StockFinanceReportEventSpider, {"security_item": security_item})

            current_report_period = get_report_period()

            # 资产负债表
            path = get_balance_sheet_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "balance_sheet"})
            else:
                current_items = get_balance_sheet_items(security_item)
                # 当前报告期还没抓取

                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "balance_sheet"})

            # 利润表
            path = get_income_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "income_statement"})
            else:
                current_items = get_income_statement_items(security_item)
                # 当前报告期还没抓取
                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "income_statement"})

            # 现金流量表
            path = get_cash_flow_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "cash_flow"})
            else:
                current_items = get_cash_flow_statement_items(security_item)
                # 当前报告期还没抓取
                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "cash_flow"})
        except Exception as e:
            logger.exception(e)
예제 #5
0
def get_income_statement_items(security_item,
                               start_date=None,
                               report_period=None,
                               report_event_date=None):
    path = get_income_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # /*营业总收入*/
        # 营业收入
        operatingRevenue = lines[2].split()[1:-1]
        # /*营业总成本*/
        OperatingTotalCosts = lines[4].split()[1:-1]
        # 营业成本
        OperatingCosts = lines[5].split()[1:-1]
        # 营业税金及附加
        businessTaxesAndSurcharges = lines[6].split()[1:-1]
        # 销售费用
        sellingExpenses = lines[7].split()[1:-1]
        # 管理费用
        ManagingCosts = lines[8].split()[1:-1]
        # 财务费用
        financingExpenses = lines[9].split()[1:-1]
        # 资产减值损失
        assetsDevaluation = lines[10].split()[1:-1]
        # 公允价值变动收益
        incomeFromChangesInFairValue = lines[11].split()[1:-1]
        # 投资收益
        investmentIncome = lines[12].split()[1:-1]
        # 其中:对联营企业和合营企业的投资收益
        investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[
            13].split()[1:-1]
        # 汇兑收益
        exchangeGains = lines[14].split()[1:-1]
        # /*营业利润*/
        operatingProfit = lines[15].split()[1:-1]
        # 加:营业外收入
        nonOperatingIncome = lines[16].split()[1:-1]
        # 减:营业外支出
        nonOperatingExpenditure = lines[17].split()[1:-1]
        # 其中:非流动资产处置损失
        disposalLossOnNonCurrentLiability = lines[18].split()[1:-1]
        # /*利润总额*/
        totalProfits = lines[19].split()[1:-1]
        # 减:所得税费用
        incomeTaxExpense = lines[20].split()[1:-1]
        # /*净利润*/
        netProfit = lines[21].split()[1:-1]
        # 归属于母公司所有者的净利润
        netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1]
        # 少数股东损益
        minorityInterestIncome = lines[23].split()[1:-1]
        # /*每股收益*/
        # 基本每股收益(元/股)
        EPS = lines[25].split()[1:-1]
        # 稀释每股收益(元/股)
        dilutedEPS = lines[26].split()[1:-1]
        # /*其他综合收益*/
        otherComprehensiveIncome = lines[27].split()[1:-1]
        # /*综合收益总额*/
        accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1]
        # 归属于母公司所有者的综合收益总额
        attributableToOwnersOfParentCompany = lines[29].split()[1:-1]
        # 归属于少数股东的综合收益总额
        attributableToMinorityShareholders = lines[30].split()[1:-1]

        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period,
                                                  reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(
                security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(
                    report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id":
                '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate":
                to_time_str(reportDate[idx]),
                "reportEventDate":
                reportEventDate,
                "securityId":
                security_item["id"],
                "code":
                security_item["code"],
                # /*营业总收入*/
                # 营业收入
                "operatingRevenue":
                to_float(operatingRevenue[idx]),
                # /*营业总成本*/
                "OperatingTotalCosts":
                to_float(OperatingTotalCosts[idx]),
                # 营业成本
                "OperatingCosts":
                to_float(OperatingCosts[idx]),
                # 营业税金及附加
                "businessTaxesAndSurcharges":
                to_float(businessTaxesAndSurcharges[idx]),
                # 销售费用
                "sellingExpenses":
                to_float(sellingExpenses[idx]),
                # 管理费用
                "ManagingCosts":
                to_float(ManagingCosts[idx]),
                # 财务费用
                "financingExpenses":
                to_float(financingExpenses[idx]),
                # 资产减值损失
                "assetsDevaluation":
                to_float(assetsDevaluation[idx]),
                # 公允价值变动收益
                "incomeFromChangesInFairValue":
                to_float(incomeFromChangesInFairValue[idx]),
                # 投资收益
                "investmentIncome":
                to_float(investmentIncome[idx]),
                # 其中:对联营企业和合营企业的投资收益
                "investmentIncomeFromRelatedEnterpriseAndJointlyOperating":
                investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx],
                # 汇兑收益
                "exchangeGains":
                to_float(exchangeGains[idx]),
                # /*营业利润*/
                "operatingProfit":
                to_float(operatingProfit[idx]),
                # 加:营业外收入
                "nonOperatingIncome":
                to_float(nonOperatingIncome[idx]),
                # 减:营业外支出
                "nonOperatingExpenditure":
                to_float(nonOperatingExpenditure[idx]),
                # 其中:非流动资产处置损失
                "disposalLossOnNonCurrentLiability":
                to_float(disposalLossOnNonCurrentLiability[idx]),
                # /*利润总额*/
                "totalProfits":
                to_float(totalProfits[idx]),
                # 减:所得税费用
                "incomeTaxExpense":
                to_float(incomeTaxExpense[idx]),
                # /*净利润*/
                "netProfit":
                to_float(netProfit[idx]),
                # 归属于母公司所有者的净利润
                "netProfitAttributedToParentCompanyOwner":
                to_float(netProfitAttributedToParentCompanyOwner[idx]),
                # 少数股东损益
                "minorityInterestIncome":
                to_float(minorityInterestIncome[idx]),
                # /*每股收益*/
                # 基本每股收益(元/股)
                "EPS":
                to_float(EPS[idx]),
                # 稀释每股收益(元/股)
                "dilutedEPS":
                to_float(dilutedEPS[idx]),
                # /*其他综合收益*/
                "otherComprehensiveIncome":
                to_float(otherComprehensiveIncome[idx]),
                # /*综合收益总额*/
                "accumulatedOtherComprehensiveIncome":
                to_float(accumulatedOtherComprehensiveIncome[idx]),
                # 归属于母公司所有者的综合收益总额
                "attributableToOwnersOfParentCompany":
                to_float(attributableToOwnersOfParentCompany[idx]),
                # 归属于少数股东的综合收益总额
                "attributableToMinorityShareholders":
                to_float(attributableToMinorityShareholders[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if result_json:
            result_json = sorted(result_json,
                                 key=lambda x: pd.Timestamp(x['reportDate']))
        return result_json
예제 #6
0
def get_income_statement_items(security_item, start_date=None, report_period=None, report_event_date=None):
    path = get_income_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # /*营业总收入*/
        # 营业收入
        operatingRevenue = lines[2].split()[1:-1]
        # /*营业总成本*/
        OperatingTotalCosts = lines[4].split()[1:-1]
        # 营业成本
        OperatingCosts = lines[5].split()[1:-1]
        # 营业税金及附加
        businessTaxesAndSurcharges = lines[6].split()[1:-1]
        # 销售费用
        sellingExpenses = lines[7].split()[1:-1]
        # 管理费用
        ManagingCosts = lines[8].split()[1:-1]
        # 财务费用
        financingExpenses = lines[9].split()[1:-1]
        # 资产减值损失
        assetsDevaluation = lines[10].split()[1:-1]
        # 公允价值变动收益
        incomeFromChangesInFairValue = lines[11].split()[1:-1]
        # 投资收益
        investmentIncome = lines[12].split()[1:-1]
        # 其中:对联营企业和合营企业的投资收益
        investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[13].split()[1:-1]
        # 汇兑收益
        exchangeGains = lines[14].split()[1:-1]
        # /*营业利润*/
        operatingProfit = lines[15].split()[1:-1]
        # 加:营业外收入
        nonOperatingIncome = lines[16].split()[1:-1]
        # 减:营业外支出
        nonOperatingExpenditure = lines[17].split()[1:-1]
        # 其中:非流动资产处置损失
        disposalLossOnNonCurrentLiability = lines[18].split()[1:-1]
        # /*利润总额*/
        totalProfits = lines[19].split()[1:-1]
        # 减:所得税费用
        incomeTaxExpense = lines[20].split()[1:-1]
        # /*净利润*/
        netProfit = lines[21].split()[1:-1]
        # 归属于母公司所有者的净利润
        netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1]
        # 少数股东损益
        minorityInterestIncome = lines[23].split()[1:-1]
        # /*每股收益*/
        # 基本每股收益(元/股)
        EPS = lines[25].split()[1:-1]
        # 稀释每股收益(元/股)
        dilutedEPS = lines[26].split()[1:-1]
        # /*其他综合收益*/
        otherComprehensiveIncome = lines[27].split()[1:-1]
        # /*综合收益总额*/
        accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1]
        # 归属于母公司所有者的综合收益总额
        attributableToOwnersOfParentCompany = lines[29].split()[1:-1]
        # 归属于少数股东的综合收益总额
        attributableToMinorityShareholders = lines[30].split()[1:-1]

        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period, reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id": '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate": to_time_str(reportDate[idx]),
                "reportEventDate": reportEventDate,
                "securityId": security_item["id"],
                "code": security_item["code"],
                # /*营业总收入*/
                # 营业收入
                "operatingRevenue": to_float(operatingRevenue[idx]),
                # /*营业总成本*/
                "OperatingTotalCosts": to_float(OperatingTotalCosts[idx]),
                # 营业成本
                "OperatingCosts": to_float(OperatingCosts[idx]),
                # 营业税金及附加
                "businessTaxesAndSurcharges": to_float(businessTaxesAndSurcharges[idx]),
                # 销售费用
                "sellingExpenses": to_float(sellingExpenses[idx]),
                # 管理费用
                "ManagingCosts": to_float(ManagingCosts[idx]),
                # 财务费用
                "financingExpenses": to_float(financingExpenses[idx]),
                # 资产减值损失
                "assetsDevaluation": to_float(assetsDevaluation[idx]),
                # 公允价值变动收益
                "incomeFromChangesInFairValue": to_float(incomeFromChangesInFairValue[idx]),
                # 投资收益
                "investmentIncome": to_float(investmentIncome[idx]),
                # 其中:对联营企业和合营企业的投资收益
                "investmentIncomeFromRelatedEnterpriseAndJointlyOperating":
                    investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx],
                # 汇兑收益
                "exchangeGains": to_float(exchangeGains[idx]),
                # /*营业利润*/
                "operatingProfit": to_float(operatingProfit[idx]),
                # 加:营业外收入
                "nonOperatingIncome": to_float(nonOperatingIncome[idx]),
                # 减:营业外支出
                "nonOperatingExpenditure": to_float(nonOperatingExpenditure[idx]),
                # 其中:非流动资产处置损失
                "disposalLossOnNonCurrentLiability": to_float(disposalLossOnNonCurrentLiability[idx]),
                # /*利润总额*/
                "totalProfits": to_float(totalProfits[idx]),
                # 减:所得税费用
                "incomeTaxExpense": to_float(incomeTaxExpense[idx]),
                # /*净利润*/
                "netProfit": to_float(netProfit[idx]),
                # 归属于母公司所有者的净利润
                "netProfitAttributedToParentCompanyOwner": to_float(netProfitAttributedToParentCompanyOwner[idx]),
                # 少数股东损益
                "minorityInterestIncome": to_float(minorityInterestIncome[idx]),
                # /*每股收益*/
                # 基本每股收益(元/股)
                "EPS": to_float(EPS[idx]),
                # 稀释每股收益(元/股)
                "dilutedEPS": to_float(dilutedEPS[idx]),
                # /*其他综合收益*/
                "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]),
                # /*综合收益总额*/
                "accumulatedOtherComprehensiveIncome": to_float(accumulatedOtherComprehensiveIncome[idx]),
                # 归属于母公司所有者的综合收益总额
                "attributableToOwnersOfParentCompany": to_float(attributableToOwnersOfParentCompany[idx]),
                # 归属于少数股东的综合收益总额
                "attributableToMinorityShareholders": to_float(attributableToMinorityShareholders[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if result_json:
            result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate']))
        return result_json
예제 #7
0
def get_income_statement_items(security_item,
                               start_date=None,
                               report_period=None,
                               report_event_date=None,
                               return_type='json'):
    """
    get income statement items.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code
    start_date : TimeStamp str or TimeStamp
        start date
    report_period : TimeStamp str or TimeStamp
        the finance report period,eg.'20170331'
    report_event_date : TimeStamp str or TimeStamp
        the finance report published date
    return_type : str
        {'json','doc'},default: 'json'

    Returns
    -------
    list of IncomeStatement
    list of json

    """

    security_item = to_security_item(security_item)

    path = get_income_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = 'GB2312'

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # /*营业总收入*/
        # 营业收入
        operatingRevenue = lines[2].split()[1:-1]
        # /*营业总成本*/
        operatingTotalCosts = lines[4].split()[1:-1]
        # 营业成本
        operatingCosts = lines[5].split()[1:-1]
        # 营业税金及附加
        businessTaxesAndSurcharges = lines[6].split()[1:-1]
        # 销售费用
        sellingExpenses = lines[7].split()[1:-1]
        # 管理费用
        ManagingCosts = lines[8].split()[1:-1]
        # 财务费用
        financingExpenses = lines[9].split()[1:-1]
        # 资产减值损失
        assetsDevaluation = lines[10].split()[1:-1]
        # 公允价值变动收益
        incomeFromChangesInFairValue = lines[11].split()[1:-1]
        # 投资收益
        investmentIncome = lines[12].split()[1:-1]
        # 其中:对联营企业和合营企业的投资收益
        investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[
            13].split()[1:-1]
        # 汇兑收益
        exchangeGains = lines[14].split()[1:-1]
        # /*营业利润*/
        operatingProfit = lines[15].split()[1:-1]
        # 加:营业外收入
        nonOperatingIncome = lines[16].split()[1:-1]
        # 减:营业外支出
        nonOperatingExpenditure = lines[17].split()[1:-1]
        # 其中:非流动资产处置损失
        disposalLossOnNonCurrentLiability = lines[18].split()[1:-1]
        # /*利润总额*/
        totalProfits = lines[19].split()[1:-1]
        # 减:所得税费用
        incomeTaxExpense = lines[20].split()[1:-1]
        # /*净利润*/
        netProfit = lines[21].split()[1:-1]
        # 归属于母公司所有者的净利润
        netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1]
        # 少数股东损益
        minorityInterestIncome = lines[23].split()[1:-1]
        # /*每股收益*/
        # 基本每股收益(元/股)
        EPS = lines[25].split()[1:-1]
        # 稀释每股收益(元/股)
        dilutedEPS = lines[26].split()[1:-1]
        # /*其他综合收益*/
        otherComprehensiveIncome = lines[27].split()[1:-1]
        # /*综合收益总额*/
        accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1]
        # 归属于母公司所有者的综合收益总额
        attributableToOwnersOfParentCompany = lines[29].split()[1:-1]
        # 归属于少数股东的综合收益总额
        attributableToMinorityShareholders = lines[30].split()[1:-1]

        result_list = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period,
                                                  reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(
                security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(
                    report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id":
                '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate":
                to_time_str(reportDate[idx]),
                "reportEventDate":
                reportEventDate,
                "securityId":
                security_item["id"],
                "code":
                security_item["code"],
                # /*营业总收入*/
                # 营业收入
                "operatingRevenue":
                to_float(operatingRevenue[idx]),
                # /*营业总成本*/
                "operatingTotalCosts":
                to_float(operatingTotalCosts[idx]),
                # 营业成本
                "operatingCosts":
                to_float(operatingCosts[idx]),
                # 营业税金及附加
                "businessTaxesAndSurcharges":
                to_float(businessTaxesAndSurcharges[idx]),
                # 销售费用
                "sellingExpenses":
                to_float(sellingExpenses[idx]),
                # 管理费用
                "ManagingCosts":
                to_float(ManagingCosts[idx]),
                # 财务费用
                "financingExpenses":
                to_float(financingExpenses[idx]),
                # 资产减值损失
                "assetsDevaluation":
                to_float(assetsDevaluation[idx]),
                # 公允价值变动收益
                "incomeFromChangesInFairValue":
                to_float(incomeFromChangesInFairValue[idx]),
                # 投资收益
                "investmentIncome":
                to_float(investmentIncome[idx]),
                # 其中:对联营企业和合营企业的投资收益
                "investmentIncomeFromRelatedEnterpriseAndJointlyOperating":
                investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx],
                # 汇兑收益
                "exchangeGains":
                to_float(exchangeGains[idx]),
                # /*营业利润*/
                "operatingProfit":
                to_float(operatingProfit[idx]),
                # 加:营业外收入
                "nonOperatingIncome":
                to_float(nonOperatingIncome[idx]),
                # 减:营业外支出
                "nonOperatingExpenditure":
                to_float(nonOperatingExpenditure[idx]),
                # 其中:非流动资产处置损失
                "disposalLossOnNonCurrentLiability":
                to_float(disposalLossOnNonCurrentLiability[idx]),
                # /*利润总额*/
                "totalProfits":
                to_float(totalProfits[idx]),
                # 减:所得税费用
                "incomeTaxExpense":
                to_float(incomeTaxExpense[idx]),
                # /*净利润*/
                "netProfit":
                to_float(netProfit[idx]),
                # 归属于母公司所有者的净利润
                "netProfitAttributedToParentCompanyOwner":
                to_float(netProfitAttributedToParentCompanyOwner[idx]),
                # 少数股东损益
                "minorityInterestIncome":
                to_float(minorityInterestIncome[idx]),
                # /*每股收益*/
                # 基本每股收益(元/股)
                "EPS":
                to_float(EPS[idx]),
                # 稀释每股收益(元/股)
                "dilutedEPS":
                to_float(dilutedEPS[idx]),
                # /*其他综合收益*/
                "otherComprehensiveIncome":
                to_float(otherComprehensiveIncome[idx]),
                # /*综合收益总额*/
                "accumulatedOtherComprehensiveIncome":
                to_float(accumulatedOtherComprehensiveIncome[idx]),
                # 归属于母公司所有者的综合收益总额
                "attributableToOwnersOfParentCompany":
                to_float(attributableToOwnersOfParentCompany[idx]),
                # 归属于少数股东的综合收益总额
                "attributableToMinorityShareholders":
                to_float(attributableToMinorityShareholders[idx])
            }

            the_data = the_json

            if return_type == 'doc':
                the_data = IncomeStatement(meta={'id': the_json['id']})
                fill_doc_type(the_data, the_json)

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_data

            result_list.append(the_data)

        if result_list:
            result_list = sorted(result_list,
                                 key=lambda x: pd.Timestamp(x['reportDate']))
        return result_list