def prepare_data(self, begin_date, end_date): shifted_begin_date = shift_date(begin_date, 500) # 取到2年之前的数据 # Invested Capital = 资产总计121 - 流动负债101+ 应付票据68 + 短期借款109 + 一年内到期的长期负债0 bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date,['ticker', 121, 101, 68, 109, 0]] bs['IC'] = bs[121] - bs[101] + bs[68] + bs[109] + bs[0] bs = bs.drop([121, 101, 68, 109, 0], axis=1) self.bs = bs.dropna() # EBT = 归母净利润40 + 财务费用56 inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40, 56]] inst = inst[(inst[56] > 1) | (inst[56] < -1)].copy() inst['return'] = inst[40] + inst[56] inst = inst.drop([40, 56], axis=1) inst.dropna(inplace=True) inst['release_date'] = inst.index inst['report_date'] = inst.index returnTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 return_df = ttmContinues(inst[inst['ticker'] == ticker], 'return') return_df['ticker'] = ticker except: # print(ticker + ': revenue error') continue returnTTM_ls.append(return_df) self.inst = pd.concat(returnTTM_ls) self.inst.set_index('datetime', inplace=True)
def prepare_data(self, begin_date, end_date): """ 数据预处理 """ shifted_begin_date = shift_date(begin_date, 500) # 向前取500个交易日 # 取利润表中“归属于母公司股东的净利润”项目,项目名称及数字详见FundDict inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40]] inst['motherNetProfit'] = inst[40] inst.drop(40, axis=1, inplace=True) # ttm算法需要“财报发布日”与“财报报告日”两个日期作为参数 inst['release_date'] = inst.index inst['report_date'] = inst.index # 净利润ttm profitTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit') reven_df['ticker'] = ticker except: continue profitTTM_ls.append(reven_df) self.profitTTM = pd.concat(profitTTM_ls) # 取“OtherData”中总市值数据 # Tushare的市值数据只有17年6月->now df = market_value(self.data_source + '\\other\\otherdata.csv', self.tickers) self.mkt_value = df.drop(['price', 'totals'], axis=1)
def prepare_data(self, begin_date, end_date): """ 数据预处理 """ shifted_begin_date = shift_date(begin_date, 500) inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40]] inst['motherNetProfit'] = inst[40] inst.drop(40, axis=1, inplace=True) inst['release_date'] = inst.index inst['report_date'] = inst.index profitTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit') reven_df['ticker'] = ticker except: continue profitTTM_ls.append(reven_df) # 净利润ttm self.profitTTM = pd.concat(profitTTM_ls) # self.profitTTM.set_index('datetime', inplace=True) # 总市值 # Tushare的市值数据只有17年-now df = market_value(self.data_source + '\\other\\otherdata.csv', self.tickers) self.mkt_value = df.drop(['price', 'totals'], axis=1)
def prepare_data(self, begin_date, end_date): shifted_begin_date = shift_date(begin_date, 800) bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 86]] bs['release_date'] = bs.index bs['report_date'] = bs.index bs['motherEquity'] = bs[86] # 归母权益 equity_mean = [] for ticker in bs['ticker'].unique(): try: tmp_equity = ttmDiscrete(bs[bs['ticker'] == ticker], 'motherEquity', 5) tmp_equity['ticker'] = ticker except: continue equity_mean.append(tmp_equity) equity_mean = pd.concat(equity_mean) inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 40]] inst['release_date'] = inst.index inst['report_date'] = inst.index inst['motherNetProfit'] = inst[40] # 归母净利润 net_profit = [] for ticker in inst['ticker'].unique(): try: tmp_profit = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit') tmp_profit['ticker'] = ticker except: continue net_profit.append(tmp_profit) net_profit = pd.concat(net_profit) # 时间排序处理 equity_mean['report_date'] = equity_mean['report_date'].apply( lambda x: x.strftime("%Y-%m-%d")) net_profit['report_date'] = net_profit['report_date'].apply( lambda x: x.strftime("%Y-%m-%d")) self.equity_mean = equity_mean.sort_values( by=['report_date', 'datetime'], ascending=[False, False]) self.net_profit = net_profit.sort_values( by=['report_date', 'datetime'], ascending=[False, False])
def prepare_data(self, begin_date, end_date): """ 数据预处理 """ # 净资产周转率 = 营业收入_TTM / 净资产总计_TTM # 净资产总计=总资产-负债总额 # 营业收入_TTM为最近4个季度报告期的营业收入之和, # 净资产总计_TTM为最近5个季度报告期总资产的平均值。 # Net asset turnover ratio = netAssets / totalLiabilities # 获取财务数据: shifted_begin_date = shift_date(begin_date, 500) #117负债, 121资产 netAssets = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 117, 121]] netAssets['netAssets'] = netAssets[121] - netAssets[117] netAssets.drop([117, 121], axis=1, inplace=True) netAssets = netAssets[netAssets['netAssets'] :0] netAssets['report_date'] = netAssets.index netAssets['release_date'] = netAssets.index netAssetsTTM_ls = [] for ticker in netAssets['ticker'].unique(): try: netAssets_df = ttmDiscrete(netAssets[netAssets['ticker'] == ticker], 'netAssets') netAssets_df['ticker'] = ticker except: # print(ticker + ': net asset error') continue netAssetsTTM_ls.append(netAssets_df) #0营业收入 revenue = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0]] revenue['revenue'] = revenue[0] revenue.drop([0], axis=1, inplace=True) revenue['report_date'] = revenue.index revenue['release_date'] = revenue.index revenueTTM_ls = [] for ticker in revenue['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(revenue[revenue['ticker'] == ticker], 'revenue') reven_df['ticker'] = ticker except: # print(ticker + ': revenue error') continue revenueTTM_ls.append(reven_df) self.revenueTTM = pd.concat(revenueTTM_ls) self.netAssetsTTM = pd.concat(netAssetsTTM_ls)
def prepare_data(self, begin_date, end_date): """ 数据预处理 """ # 获取财务数据: # CATurnover = currentAssets 103 / revenue 0 shifted_begin_date = shift_date(begin_date, 500) bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 103]] bs['release_date'] = bs.index bs['report_date'] = bs.index bs['currentAssets'] = bs[103] bs.drop(103, axis=1, inplace=True) inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0]] inst['release_date'] = inst.index inst['report_date'] = inst.index inst['revenue'] = inst[0] inst.drop([0], axis=1, inplace=True) # TTM Continues处理 revenueTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'revenue') reven_df['ticker'] = ticker except: print(ticker + ': revenue error') continue revenueTTM_ls.append(reven_df) # TTM Discrete 取近期平均 currentAssetsTTM_ls = [] for ticker in bs['ticker'].unique(): try: currentAssets_df = ttmDiscrete(bs[bs['ticker'] == ticker], 'currentAssets') currentAssets_df['ticker'] = ticker except: print(ticker + ': current asset error') continue currentAssetsTTM_ls.append(currentAssets_df) self.revenueTTM = pd.concat(revenueTTM_ls) self.currentAssetsTTM = pd.concat(currentAssetsTTM_ls)
def prepare_data(self, begin_date, end_date): shifted_begin_date = shift_date(begin_date, 700) # totalAssets 121 bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 121]] bs['release_date'] = bs.index bs['report_date'] = bs.index bs['totalAssets'] = bs[121] bs.drop(121, axis=1, inplace=True) # revenue 0, cost 4 inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0, 4]] inst['release_date'] = inst.index inst['report_date'] = inst.index inst['revenue'] = inst[0] inst.drop(0, axis=1, inplace=True) revenueTTM_ls = [] totalAssetsTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'revenue') reven_df['ticker'] = ticker except: print(ticker + ': revenue error') continue revenueTTM_ls.append(reven_df) for ticker in bs['ticker'].unique(): try: total_asset_df = ttmDiscrete(bs[bs['ticker'] == ticker], 'totalAssets') total_asset_df['ticker'] = ticker except: print(ticker + ': total asset error') continue totalAssetsTTM_ls.append(total_asset_df) self.revenueTTM = pd.concat(revenueTTM_ls) self.totalAssetsTTM = pd.concat(totalAssetsTTM_ls)
def prepare_data(self, begin_date, end_date): shifted_begin_date = shift_date(begin_date, 700) inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0, 4]] inst['release_date'] = inst.index inst['report_date'] = inst.index inst['revenue'] = inst[0] inst['cost'] = inst[4] inst.drop([0, 4], axis=1, inplace=True) revenueTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'revenue,cost') reven_df['ticker'] = ticker except: print(ticker + ': revenue and cost error') continue revenueTTM_ls.append(reven_df) self.revenue_cost_TTM = pd.concat(revenueTTM_ls)
def prepare_data(self, begin_date, end_date): shifted_begin_date = shift_date(begin_date, 500) # motherNetProfit 40 inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 40]] inst['release_date'] = inst.index inst['report_date'] = inst.index # cash_flows_yield 133 cf = cp.concat_fund(self.data_source, self.tickers, 'CF').loc[shifted_begin_date:end_date, ['ticker', 133]] cf['release_date'] = cf.index cf['report_date'] = cf.index self.accrual_df = cf.merge( inst, on=['ticker', 'release_date', 'report_date']) self.accrual_df['accr'] = self.accrual_df[40] - self.accrual_df[133] cash_flow_ls = [] for ticker in self.accrual_df['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues( self.accrual_df[self.accrual_df['ticker'] == ticker], 'accr') reven_df['ticker'] = ticker except: continue cash_flow_ls.append(reven_df) self.accrual_ttm = pd.concat(cash_flow_ls) # 总市值 # Tushare的市值数据只有17年-now df = market_value(self.data_source + '\\other\\otherdata.csv', self.tickers) self.mkt_value = df.drop(['price', 'totals'], axis=1)