def get_all_financial_resource(self): dowllist = self.get_all_financial_list_utils() downroad = list(dowllist['filename']) datacrawler = HistoryFinancialCrawler() for road in downroad: if os.path.exists(road) != True: datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename=road, path_to_download=road)
def get_single_financial_resource(self, current_filename): datacrawler = HistoryFinancialCrawler() if os.path.exists(current_filename) != True: currentdata = datacrawler.fetch_and_parse( reporthook=demo_reporthook, filename=current_filename, path_to_download=current_filename) reader = HistoryFinancialReader() database_data = reader.get_df(current_filename) database_data = renames.rename_list_utils( ).rename_current_finance_utils(database_data) return database_data
def financial(downdir='.'): from pytdx.crawler.base_crawler import demo_reporthook from pytdx.crawler.history_financial_crawler import HistoryFinancialCrawler from pytdx.crawler.history_financial_crawler import HistoryFinancialListCrawler crawler = HistoryFinancialListCrawler() datacrawler = HistoryFinancialCrawler() list_data = crawler.fetch_and_parse() for x in tqdm(list_data): downfile = os.path.join(downdir, x['filename']) result = datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename=x['filename'], path_to_download=downfile)
def get_df(self, data_file): """ 读取历史财务数据文件,并返回pandas结果 , 类似gpcw20171231.zip格式,具体字段含义参考 https://github.com/rainx/pytdx/issues/133 :param data_file: 数据文件地址, 数据文件类型可以为 .zip 文件,也可以为解压后的 .dat :return: pandas DataFrame格式的历史财务数据 """ crawler = HistoryFinancialCrawler() with open(data_file, 'rb') as df: data = crawler.parse(download_file=df) return crawler.to_df(data)
def download_financialzip_fromtdx(): """ 会创建一个download/文件夹 """ result = get_filename() res = [] for item, md5 in result: if item in os.listdir(download_path) and \ md5 == QA_util_file_md5('{}{}{}'.format(download_path, os.sep, item)): print('FILE {} is already in {}'.format(item, download_path)) else: print('CURRENTLY GET/UPDATE {}'.format(item[0:12])) downloadpath = download_path + '/' + item datacrawler = HistoryFinancialCrawler() datacrawler.fetch_and_parse(reporthook=None, filename=item, path_to_download=downloadpath) res.append(item) return res
def fetch(downdir='.', filename=None, filelist=False, **kwargs): crawler = HistoryFinancialListCrawler() list_data = crawler.fetch_and_parse() if filelist: return list_data datacrawler = HistoryFinancialCrawler() if filename: downfile = os.path.join(downdir, filename) datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename=filename, path_to_download=downfile) return list_data for x in list_data: downfile = os.path.join(downdir, x['filename']) result = datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename=x['filename'], path_to_download=downfile) return list_data
def fetch(downdir='.', filename=None, filelist=False, **kwargs): crawler = HistoryFinancialListCrawler() content=downdir+'/content.txt' # list_data = crawler.fetch_and_parse() olist_data=[] if os.path.isfile(content): download_file = open(content, 'rb') olist_data = crawler.parse(download_file) download_file.close() nlist_data = crawler.fetch_and_parse(path_to_download=content) list_data = diff(nlist_data,olist_data) if filelist: return list_data datacrawler = HistoryFinancialCrawler() if filename: downfile = os.path.join(downdir, filename) datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename=filename, path_to_download=downfile) return list_data for x in list_data: downfile = os.path.join(downdir, x['filename']) result = datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename=x['filename'], path_to_download=downfile) return list_data
def import_stock_fin(): """ 通过pytdx接口下载财务数据 :return: """ table_name = 'pytdx_stock_fin' has_table = engine_md.has_table(table_name) report_date_latest = None if has_table: with with_db_session(engine_md) as session: sql_str = "select max(report_date) from {table_name}".format(table_name=table_name) report_date_latest = session.execute(sql_str).scalar() # 财务指标的名称含义,参考 issue #163 https://github.com/QUANTAXIS/QUANTAXIS/blob/master/QUANTAXIS/QAData/financial_mean.py financial_dict = { # 1.每股指标 '001基本每股收益': 'EPS', '002扣除非经常性损益每股收益': 'deductEPS', '003每股未分配利润': 'undistributedProfitPerShare', '004每股净资产': 'netAssetsPerShare', '005每股资本公积金': 'capitalReservePerShare', '006净资产收益率': 'ROE', '007每股经营现金流量': 'operatingCashFlowPerShare', # 2. 资产负债表 BALANCE SHEET # 2.1 资产 # 2.1.1 流动资产 '008货币资金': 'moneyFunds', '009交易性金融资产': 'tradingFinancialAssets', '010应收票据': 'billsReceivables', '011应收账款': 'accountsReceivables', '012预付款项': 'prepayments', '013其他应收款': 'otherReceivables', '014应收关联公司款': 'interCompanyReceivables', '015应收利息': 'interestReceivables', '016应收股利': 'dividendsReceivables', '017存货': 'inventory', '018其中:消耗性生物资产': 'expendableBiologicalAssets', '019一年内到期的非流动资产': 'noncurrentAssetsDueWithinOneYear', '020其他流动资产': 'otherLiquidAssets', '021流动资产合计': 'totalLiquidAssets', # 2.1.2 非流动资产 '022可供出售金融资产': 'availableForSaleSecurities', '023持有至到期投资': 'heldToMaturityInvestments', '024长期应收款': 'longTermReceivables', '025长期股权投资': 'longTermEquityInvestment', '026投资性房地产': 'investmentRealEstate', '027固定资产': 'fixedAssets', '028在建工程': 'constructionInProgress', '029工程物资': 'engineerMaterial', '030固定资产清理': 'fixedAssetsCleanUp', '031生产性生物资产': 'productiveBiologicalAssets', '032油气资产': 'oilAndGasAssets', '033无形资产': 'intangibleAssets', '034开发支出': 'developmentExpenditure', '035商誉': 'goodwill', '036长期待摊费用': 'longTermDeferredExpenses', '037递延所得税资产': 'deferredIncomeTaxAssets', '038其他非流动资产': 'otherNonCurrentAssets', '039非流动资产合计': 'totalNonCurrentAssets', '040资产总计': 'totalAssets', # 2.2 负债 # 2.2.1 流动负债 '041短期借款': 'shortTermLoan', '042交易性金融负债': 'tradingFinancialLiabilities', '043应付票据': 'billsPayable', '044应付账款': 'accountsPayable', '045预收款项': 'advancedReceivable', '046应付职工薪酬': 'employeesPayable', '047应交税费': 'taxPayable', '048应付利息': 'interestPayable', '049应付股利': 'dividendPayable', '050其他应付款': 'otherPayable', '051应付关联公司款': 'interCompanyPayable', '052一年内到期的非流动负债': 'noncurrentLiabilitiesDueWithinOneYear', '053其他流动负债': 'otherCurrentLiabilities', '054流动负债合计': 'totalCurrentLiabilities', # 2.2.2 非流动负债 '055长期借款': 'longTermLoans', '056应付债券': 'bondsPayable', '057长期应付款': 'longTermPayable', '058专项应付款': 'specialPayable', '059预计负债': 'estimatedLiabilities', '060递延所得税负债': 'defferredIncomeTaxLiabilities', '061其他非流动负债': 'otherNonCurrentLiabilities', '062非流动负债合计': 'totalNonCurrentLiabilities', '063负债合计': 'totalLiabilities', # 2.3 所有者权益 '064实收资本(或股本)': 'totalShare', '065资本公积': 'capitalReserve', '066盈余公积': 'surplusReserve', '067减:库存股': 'treasuryStock', '068未分配利润': 'undistributedProfits', '069少数股东权益': 'minorityEquity', '070外币报表折算价差': 'foreignCurrencyReportTranslationSpread', '071非正常经营项目收益调整': 'abnormalBusinessProjectEarningsAdjustment', '072所有者权益(或股东权益)合计': 'totalOwnersEquity', '073负债和所有者(或股东权益)合计': 'totalLiabilitiesAndOwnersEquity', # 3. 利润表 '074其中:营业收入': 'operatingRevenue', '075其中:营业成本': 'operatingCosts', '076营业税金及附加': 'taxAndSurcharges', '077销售费用': 'salesCosts', '078管理费用': 'managementCosts', '079堪探费用': 'explorationCosts', '080财务费用': 'financialCosts', '081资产减值损失': 'assestsDevaluation', '082加:公允价值变动净收益': 'profitAndLossFromFairValueChanges', '083投资收益': 'investmentIncome', '084其中:对联营企业和合营企业的投资收益': 'investmentIncomeFromAffiliatedBusinessAndCooperativeEnterprise', '085影响营业利润的其他科目': 'otherSubjectsAffectingOperatingProfit', '086三、营业利润': 'operatingProfit', '087加:补贴收入': 'subsidyIncome', '088营业外收入': 'nonOperatingIncome', '089减:营业外支出': 'nonOperatingExpenses', '090其中:非流动资产处置净损失': 'netLossFromDisposalOfNonCurrentAssets', '091加:影响利润总额的其他科目': 'otherSubjectsAffectTotalProfit', '092四、利润总额': 'totalProfit', '093减:所得税': 'incomeTax', '094加:影响净利润的其他科目': 'otherSubjectsAffectNetProfit', '095五、净利润': 'netProfit', '096归属于母公司所有者的净利润': 'netProfitsBelongToParentCompanyOwner', '097少数股东损益': 'minorityProfitAndLoss', # 4. 现金流量表 # 4.1 经营活动 Operating '098销售商品、提供劳务收到的现金': 'cashFromGoodsSalesorOrRenderingOfServices', '099收到的税费返还': 'refundOfTaxAndFeeReceived', '100收到其他与经营活动有关的现金': 'otherCashRelatedBusinessActivitiesReceived', '101经营活动现金流入小计': 'cashInflowsFromOperatingActivities', '102购买商品、接受劳务支付的现金': 'buyingGoodsReceivingCashPaidForLabor', '103支付给职工以及为职工支付的现金': 'paymentToEmployeesAndCashPaidForEmployees', '104支付的各项税费': 'paymentsOfVariousTaxes', '105支付其他与经营活动有关的现金': 'paymentOfOtherCashRelatedToBusinessActivities', '106经营活动现金流出小计': 'cashOutflowsFromOperatingActivities', '107经营活动产生的现金流量净额': 'netCashFlowsFromOperatingActivities', # 4.2 投资活动 Investment '108收回投资收到的现金': 'cashReceivedFromInvestmentReceived', '109取得投资收益收到的现金': 'cashReceivedFromInvestmentIncome', '110处置固定资产、无形资产和其他长期资产收回的现金净额': 'disposalOfNetCashForRecoveryOfFixedAssets', '111处置子公司及其他营业单位收到的现金净额': 'disposalOfNetCashReceivedFromSubsidiariesAndOtherBusinessUnits', '112收到其他与投资活动有关的现金': 'otherCashReceivedRelatingToInvestingActivities', '113投资活动现金流入小计': 'cashinFlowsFromInvestmentActivities', '114购建固定资产、无形资产和其他长期资产支付的现金': 'cashForThePurchaseConstructionPaymentOfFixedAssets', '115投资支付的现金': 'cashInvestment', '116取得子公司及其他营业单位支付的现金净额': 'acquisitionOfNetCashPaidBySubsidiariesAndOtherBusinessUnits', '117支付其他与投资活动有关的现金': 'otherCashPaidRelatingToInvestingActivities', '118投资活动现金流出小计': 'cashOutflowsFromInvestmentActivities', '119投资活动产生的现金流量净额': 'netCashFlowsFromInvestingActivities', # 4.3 筹资活动 Financing '120吸收投资收到的现金': 'cashReceivedFromInvestors', '121取得借款收到的现金': 'cashFromBorrowings', '122收到其他与筹资活动有关的现金': 'otherCashReceivedRelatingToFinancingActivities', '123筹资活动现金流入小计': 'cashInflowsFromFinancingActivities', '124偿还债务支付的现金': 'cashPaymentsOfAmountBorrowed', '125分配股利、利润或偿付利息支付的现金': 'cashPaymentsForDistrbutionOfDividendsOrProfits', '126支付其他与筹资活动有关的现金': 'otherCashPaymentRelatingToFinancingActivities', '127筹资活动现金流出小计': 'cashOutflowsFromFinancingActivities', '128筹资活动产生的现金流量净额': 'netCashFlowsFromFinancingActivities', # 4.4 汇率变动 '129四、汇率变动对现金的影响': 'effectOfForeignExchangRateChangesOnCash', '130四(2)、其他原因对现金的影响': 'effectOfOtherReasonOnCash', # 4.5 现金及现金等价物净增加 '131五、现金及现金等价物净增加额': 'netIncreaseInCashAndCashEquivalents', '132期初现金及现金等价物余额': 'initialCashAndCashEquivalentsBalance', # 4.6 期末现金及现金等价物余额 '133期末现金及现金等价物余额': 'theFinalCashAndCashEquivalentsBalance', # 4.x 补充项目 Supplementary Schedule: # 现金流量附表项目 Indirect Method # 4.x.1 将净利润调节为经营活动现金流量 Convert net profit to cash flow from operating activities '134净利润': 'netProfitFromOperatingActivities', '135资产减值准备': 'provisionForAssetsLosses', '136固定资产折旧、油气资产折耗、生产性生物资产折旧': 'depreciationForFixedAssets', '137无形资产摊销': 'amortizationOfIntangibleAssets', '138长期待摊费用摊销': 'amortizationOfLong_termDeferredExpenses', '139处置固定资产、无形资产和其他长期资产的损失': 'lossOfDisposingFixedAssetsIntangibleAssetsAndOtherLongTermAssets', '140固定资产报废损失': 'scrapLossOfFixedAssets', '141公允价值变动损失': 'lossFromFairValueChange', '142财务费用': 'financialExpenses', '143投资损失': 'investmentLosses', '144递延所得税资产减少': 'decreaseOfDeferredTaxAssets', '145递延所得税负债增加': 'increaseOfDeferredTaxLiabilities', '146存货的减少': 'decreaseOfInventory', '147经营性应收项目的减少': 'decreaseOfOperationReceivables', '148经营性应付项目的增加': 'increaseOfOperationPayables', '149其他': 'others', '150经营活动产生的现金流量净额2': 'netCashFromOperatingActivities2', # 4.x.2 不涉及现金收支的投资和筹资活动 Investing and financing activities not involved in cash '151债务转为资本': 'debtConvertedToCSapital', '152一年内到期的可转换公司债券': 'convertibleBondMaturityWithinOneYear', '153融资租入固定资产': 'leaseholdImprovements', # 4.x.3 现金及现金等价物净增加情况 Net increase of cash and cash equivalents '154现金的期末余额': 'cashEndingBal', '155现金的期初余额': 'cashBeginingBal', '156现金等价物的期末余额': 'cashEquivalentsEndingBal', '157现金等价物的期初余额': 'cashEquivalentsBeginningBal', '158现金及现金等价物净增加额': 'netIncreaseOfCashAndCashEquivalents', # 5. 偿债能力分析 '159流动比率': 'currentRatio', # 流动资产/流动负债 '160速动比率': 'acidTestRatio', # (流动资产-存货)/流动负债 '161现金比率(%)': 'cashRatio', # (货币资金+有价证券)÷流动负债 '162利息保障倍数': 'interestCoverageRatio', # (利润总额+财务费用(仅指利息费用部份))/利息费用 '163非流动负债比率(%)': 'noncurrentLiabilitiesRatio', '164流动负债比率(%)': 'currentLiabilitiesRatio', '165现金到期债务比率(%)': 'cashDebtRatio', # 企业经营现金净流入/(本期到期长期负债+本期应付票据) '166有形资产净值债务率(%)': 'debtToTangibleAssetsRatio', '167权益乘数(%)': 'equityMultiplier', # 资产总额/股东权益总额 '168股东的权益/负债合计(%)': 'equityDebtRatio', # 权益负债率 '169有形资产/负债合计(%)': 'tangibleAssetDebtRatio', # 有形资产负债率 '170经营活动产生的现金流量净额/负债合计(%)': 'netCashFlowsFromOperatingActivitiesDebtRatio', '171EBITDA/负债合计(%)': 'EBITDA_Liabilities', # 6. 经营效率分析 # 销售收入÷平均应收账款=销售收入\(0.5 x(应收账款期初+期末)) '172应收帐款周转率': 'turnoverRatioOfReceivable', '173存货周转率': 'turnoverRatioOfInventory', # (存货周转天数+应收帐款周转天数-应付帐款周转天数+预付帐款周转天数-预收帐款周转天数)/365 '174运营资金周转率': 'turnoverRatioOfOperatingAssets', '175总资产周转率': 'turnoverRatioOfTotalAssets', '176固定资产周转率': 'turnoverRatioOfFixedAssets', # 企业销售收入与固定资产净值的比率 '177应收帐款周转天数': 'daysSalesOutstanding', # 企业从取得应收账款的权利到收回款项、转换为现金所需要的时间 '178存货周转天数': 'daysSalesOfInventory', # 企业从取得存货开始,至消耗、销售为止所经历的天数 '179流动资产周转率': 'turnoverRatioOfCurrentAssets', # 流动资产周转率(次)=主营业务收入/平均流动资产总额 '180流动资产周转天数': 'daysSalesofCurrentAssets', '181总资产周转天数': 'daysSalesofTotalAssets', '182股东权益周转率': 'equityTurnover', # 销售收入/平均股东权益 # 7. 发展能力分析 '183营业收入增长率(%)': 'operatingIncomeGrowth', '184净利润增长率(%)': 'netProfitGrowthRate', # NPGR 利润总额-所得税 '185净资产增长率(%)': 'netAssetsGrowthRate', '186固定资产增长率(%)': 'fixedAssetsGrowthRate', '187总资产增长率(%)': 'totalAssetsGrowthRate', '188投资收益增长率(%)': 'investmentIncomeGrowthRate', '189营业利润增长率(%)': 'operatingProfitGrowthRate', '190暂无': 'None1', '191暂无': 'None2', '192暂无': 'None3', # 8. 获利能力分析 '193成本费用利润率(%)': 'rateOfReturnOnCost', '194营业利润率': 'rateOfReturnOnOperatingProfit', '195营业税金率': 'rateOfReturnOnBusinessTax', '196营业成本率': 'rateOfReturnOnOperatingCost', '197净资产收益率': 'rateOfReturnOnCommonStockholdersEquity', '198投资收益率': 'rateOfReturnOnInvestmentIncome', '199销售净利率(%)': 'rateOfReturnOnNetSalesProfit', '200总资产报酬率': 'rateOfReturnOnTotalAssets', '201净利润率': 'netProfitMargin', '202销售毛利率(%)': 'rateOfReturnOnGrossProfitFromSales', '203三费比重': 'threeFeeProportion', '204管理费用率': 'ratioOfChargingExpense', '205财务费用率': 'ratioOfFinancialExpense', '206扣除非经常性损益后的净利润': 'netProfitAfterExtraordinaryGainsAndLosses', '207息税前利润(EBIT)': 'EBIT', '208息税折旧摊销前利润(EBITDA)': 'EBITDA', '209EBITDA/营业总收入(%)': 'EBITDA_GrossRevenueRate', # 9. 资本结构分析 '210资产负债率(%)': 'assetsLiabilitiesRatio', '211流动资产比率': 'currentAssetsRatio', # 期末的流动资产除以所有者权益 '212货币资金比率': 'monetaryFundRatio', '213存货比率': 'inventoryRatio', '214固定资产比率': 'fixedAssetsRatio', '215负债结构比': 'liabilitiesStructureRatio', '216归属于母公司股东权益/全部投入资本(%)': 'shareholdersOwnershipOfAParentCompany_TotalCapital', '217股东的权益/带息债务(%)': 'shareholdersInterest_InterestRateDebtRatio', '218有形资产/净债务(%)': 'tangibleAssets_NetDebtRatio', # 10. 现金流量分析 '219每股经营性现金流(元)': 'operatingCashFlowPerShareY', '220营业收入现金含量(%)': 'cashOfOperatingIncome', '221经营活动产生的现金流量净额/经营活动净收益(%)': 'netOperatingCashFlow_netOperationProfit', '222销售商品提供劳务收到的现金/营业收入(%)': 'cashFromGoodsSales_OperatingRevenue', '223经营活动产生的现金流量净额/营业收入': 'netOperatingCashFlow_OperatingRevenue', '224资本支出/折旧和摊销': 'capitalExpenditure_DepreciationAndAmortization', '225每股现金流量净额(元)': 'netCashFlowPerShare', '226经营净现金比率(短期债务)': 'operatingCashFlow_ShortTermDebtRatio', '227经营净现金比率(全部债务)': 'operatingCashFlow_LongTermDebtRatio', '228经营活动现金净流量与净利润比率': 'cashFlowRateAndNetProfitRatioOfOperatingActivities', '229全部资产现金回收率': 'cashRecoveryForAllAssets', # 11. 单季度财务指标 '230营业收入': 'operatingRevenueSingle', '231营业利润': 'operatingProfitSingle', '232归属于母公司所有者的净利润': 'netProfitBelongingToTheOwnerOfTheParentCompanySingle', '233扣除非经常性损益后的净利润': 'netProfitAfterExtraordinaryGainsAndLossesSingle', '234经营活动产生的现金流量净额': 'netCashFlowsFromOperatingActivitiesSingle', '235投资活动产生的现金流量净额': 'netCashFlowsFromInvestingActivitiesSingle', '236筹资活动产生的现金流量净额': 'netCashFlowsFromFinancingActivitiesSingle', '237现金及现金等价物净增加额': 'netIncreaseInCashAndCashEquivalentsSingle', # 12.股本股东 '238总股本': 'totalCapital', '239已上市流通A股': 'listedAShares', '240已上市流通B股': 'listedBShares', '241已上市流通H股': 'listedHShares', '242股东人数(户)': 'numberOfShareholders', '243第一大股东的持股数量': 'theNumberOfFirstMajorityShareholder', '244十大流通股东持股数量合计(股)': 'totalNumberOfTopTenCirculationShareholders', '245十大股东持股数量合计(股)': 'totalNumberOfTopTenMajorShareholders', # 13.机构持股 '246机构总量(家)': 'institutionNumber', '247机构持股总量(股)': 'institutionShareholding', '248QFII机构数': 'QFIIInstitutionNumber', '249QFII持股量': 'QFIIShareholding', '250券商机构数': 'brokerNumber', '251券商持股量': 'brokerShareholding', '252保险机构数': 'securityNumber', '253保险持股量': 'securityShareholding', '254基金机构数': 'fundsNumber', '255基金持股量': 'fundsShareholding', '256社保机构数': 'socialSecurityNumber', '257社保持股量': 'socialSecurityShareholding', '258私募机构数': 'privateEquityNumber', '259私募持股量': 'privateEquityShareholding', '260财务公司机构数': 'financialCompanyNumber', '261财务公司持股量': 'financialCompanyShareholding', '262年金机构数': 'pensionInsuranceAgencyNumber', '263年金持股量': 'pensionInsuranceAgencyShareholfing', # 14.新增指标 # [注:季度报告中,若股东同时持有非流通A股性质的股份(如同时持有流通A股和流通B股),取的是包含同时持有非流通A股性质的流通股数] '264十大流通股东中持有A股合计(股)': 'totalNumberOfTopTenCirculationShareholdersForA', '265第一大流通股东持股量(股)': 'firstLargeCirculationShareholdersNumber', # [注:1.自由流通股=已流通A股-十大流通股东5%以上的A股;2.季度报告中,若股东同时持有非流通A股性质的股份(如同时持有流通A股和流通H股),5%以上的持股取的是不包含同时持有非流通A股性质的流通股数,结果可能偏大; 3.指标按报告期展示,新股在上市日的下个报告期才有数据] '266自由流通股(股)': 'freeCirculationStock', '267受限流通A股(股)': 'limitedCirculationAShares', '268一般风险准备(金融类)': 'generalRiskPreparation', '269其他综合收益(利润表)': 'otherComprehensiveIncome', '270综合收益总额(利润表)': 'totalComprehensiveIncome', '271归属于母公司股东权益(资产负债表)': 'shareholdersOwnershipOfAParentCompany', '272银行机构数(家)(机构持股)': 'bankInstutionNumber', '273银行持股量(股)(机构持股)': 'bankInstutionShareholding', '274一般法人机构数(家)(机构持股)': 'corporationNumber', '275一般法人持股量(股)(机构持股)': 'corporationShareholding', '276近一年净利润(元)': 'netProfitLastYear', '277信托机构数(家)(机构持股)': 'trustInstitutionNumber', '278信托持股量(股)(机构持股)': 'trustInstitutionShareholding', '279特殊法人机构数(家)(机构持股)': 'specialCorporationNumber', '280特殊法人持股量(股)(机构持股)': 'specialCorporationShareholding', '281加权净资产收益率(每股指标)': 'weightedROE', '282扣非每股收益(单季度财务指标)': 'nonEPSSingle', } # 整理字段 for key in financial_dict.keys(): val = financial_dict[key] val = val.strip().replace('/', '_').replace('-', '_')[:64] financial_dict[key] = val # 设置列名称对应关系 _pattern = re.compile(r'\d{3}') col_name_dic = { 'col%d' % int(_pattern.search(key).group()): val for key, val in financial_dict.items() if _pattern.search(key) is not None } # 设置 dtype dtype = {val: DOUBLE for key, val in financial_dict.items() if _pattern.search(key) is not None} dtype['ts_code'] = String(10) dtype['report_date'] = Date # 下载财务数据 crawler = HistoryFinancialListCrawler() list_data = crawler.fetch_and_parse() # print(pd.DataFrame(data=list_data)) list_count = len(list_data) logger.debug('%d 财务数据包可用', len(list_data)) datacrawler = HistoryFinancialCrawler() pd.set_option('display.max_columns', None) tot_data_count = 0 _pattern_file_date = re.compile(r'(?<=gpcw)\d{8}(?=.zip)') try: for num, file_info in enumerate(list_data, start=1): filename = file_info['filename'] # 检查当前文件的日期是否大于数据库中的最大记录日期 if report_date_latest is not None: m = _pattern_file_date.search(filename) if m is None: logger.error('filename:%s 格式匹配失败 %s', filename, _pattern_file_date) else: report_date_cur = str_2_date(m.group(), '%Y%m%d') if report_date_cur <= report_date_latest: continue logger.info('%d/%d) 开始下载 %s 数据', num, list_count, filename) # result = datacrawler.fetch_and_parse( # reporthook=demo_reporthook, filename=filename, path_to_download="/tmpfile.zip") result = fetch_and_parse(datacrawler, reporthook=demo_reporthook, filename=filename) if result is None: continue data_df = datacrawler.to_df(data=result) data_df.rename(columns=col_name_dic, inplace=True) data_df.reset_index(inplace=True) data_df['ts_code'] = data_df['code'].apply(lambda x: x + ".SH" if x[0] == '6' else x + '.SZ') data_df.drop(['code'], axis=1, inplace=True) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype, myisam_if_create_table=True) tot_data_count += data_count finally: logging.info("更新 %s 结束 %d 条信息被更新", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `report_date` `report_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `report_date`)""".format(table_name=table_name) execute_sql(create_pk_str, engine_md, commit=True) logger.info('%s 建立主键 [code, report_date]', table_name)
# Doanload all historical data from 163 and parse to csv path = 'C:/temp/finance/' files = os.listdir(path) crawler = HistoryFinancialListCrawler() list_data = crawler.fetch_and_parse() file_name = [] for i in list_data: file_name.append(i['filename']) for name in file_name[-2:]: if name in files: continue datacrawler = HistoryFinancialCrawler() pd.set_option('display.max_columns', None) download_target = path + name csv_name = download_target.replace('.zip', '.csv') result = datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename=name, path_to_download=download_target) datacrawler.to_df(data=result).round(3).drop( ['col190', 'col191', 'col192', 'col283', 'col284', 'col285', 'col286'], axis=1).drop_duplicates(subset=None, keep='first', inplace=False).to_csv(csv_name) # for name in files: # target = path + name # csv_name = target.replace('.dat','.csv') # HistoryFinancialReader().get_df(target).round(3).drop(['col190','col191','col192','col283','col284','col285', 'col286'], axis=1).drop_duplicates(subset=None, keep='first', inplace=False).to_csv(csv_name)
list_data = crawler.fetch_and_parse() list_df = pd.DataFrame(list_data) try: old_list_df = pd.read_csv(r"..\all_cwsj\gpcw.txt") temp_list_df = list_df.merge(old_list_df, how='outer', on='filename') temp_list_df = temp_list_df[ temp_list_df['hash_x'] != temp_list_df['hash_y']] except: temp_list_df = list_df if temp_list_df.empty: logging.info('No update.') exit() else: list_df.to_csv(r"..\all_cwsj\gpcw.txt", encoding="gbk", quoting=csv.QUOTE_NONE, index=False) filename_list = temp_list_df['filename'] crawler = HistoryFinancialCrawler() for filename in filename_list: logging.info("download {}".format(filename)) result = crawler.fetch_and_parse( filename=filename, path_to_download=r"..\all_cwsj\{}".format(filename)) logging.info("end") #pd.set_option('display.max_columns', None) #result = datacrawler.fetch_and_parse(reporthook=demo_reporthook, filename='gpcw20000930.zip', path_to_download=".\\gpcw20000930.zip") #print(datacrawler.to_df(data=result))
import pandas as pd import numpy as np import re import csv import io import time import traceback import logging from pytdx.crawler.history_financial_crawler import HistoryFinancialCrawler, HistoryFinancialListCrawler logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", level=logging.DEBUG) if __name__ == '__main__': gpcw_df = pd.read_csv(r"..\all_cwsj\gpcw.txt") # type: pd.DataFrame datacrawler = HistoryFinancialCrawler() all_gpcw_df = None # type: pd.DataFrame # http://www.tdx.com.cn/products/helpfile/tdxw/chm/%E7%AC%AC%E4%B8%89%E7%AB%A0%20%20%20%E5%9F%BA%E7%A1%80%E5%8A%9F%E8%83%BD/3-3/3-3-2/3-3-2-15/3-3-2-15.html used_zhibiao_df = pd.DataFrame.from_records( [['report_date', 'report_date'], ['col40', '总资产'], ['col238', '总股本'], ['col271', '净资产'], ['col96', '净利润'], ['col74', '营业总收入'], ['col1', '每股收益'], ['col2', '每股扣非收益'], ['col5', '每股公积金'], ['col3', '每股未分配利润'], ['col4', '每股净资产'], ['col225', '每股现金流'], ['col183', '营业收入增长率'], ['col189', '营业利润增长率'], ['col184', '净利润增长率'], ['col185', '净资产增长率'], ['col202', '销售毛利率'], ['col199', '销售净利率'], ['col175', '总资产周转率'], ['col167', '权益乘数'], ['col6', 'ROE']], columns=['col', 'name']) used_zhibiao_col_list = used_zhibiao_df['col'].tolist() used_zhibiao_df = used_zhibiao_df.set_index('col')