def prepare_data(self, begin_date, end_date): shifted_begin_date = shift_date(begin_date, 800) bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 86]] bs['release_date'] = bs.index bs['report_date'] = bs.index bs['motherEquity'] = bs[86] # 归母权益 equity_mean = [] for ticker in bs['ticker'].unique(): try: tmp_equity = ttmDiscrete(bs[bs['ticker'] == ticker], 'motherEquity', 5) tmp_equity['ticker'] = ticker except: continue equity_mean.append(tmp_equity) equity_mean = pd.concat(equity_mean) inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 40]] inst['release_date'] = inst.index inst['report_date'] = inst.index inst['motherNetProfit'] = inst[40] # 归母净利润 net_profit = [] for ticker in inst['ticker'].unique(): try: tmp_profit = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit') tmp_profit['ticker'] = ticker except: continue net_profit.append(tmp_profit) net_profit = pd.concat(net_profit) # 时间排序处理 equity_mean['report_date'] = equity_mean['report_date'].apply( lambda x: x.strftime("%Y-%m-%d")) net_profit['report_date'] = net_profit['report_date'].apply( lambda x: x.strftime("%Y-%m-%d")) self.equity_mean = equity_mean.sort_values( by=['report_date', 'datetime'], ascending=[False, False]) self.net_profit = net_profit.sort_values( by=['report_date', 'datetime'], ascending=[False, False])
def prepare_data(self, begin_date, end_date): """ 数据预处理 """ # 净资产周转率 = 营业收入_TTM / 净资产总计_TTM # 净资产总计=总资产-负债总额 # 营业收入_TTM为最近4个季度报告期的营业收入之和, # 净资产总计_TTM为最近5个季度报告期总资产的平均值。 # Net asset turnover ratio = netAssets / totalLiabilities # 获取财务数据: shifted_begin_date = shift_date(begin_date, 500) #117负债, 121资产 netAssets = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 117, 121]] netAssets['netAssets'] = netAssets[121] - netAssets[117] netAssets.drop([117, 121], axis=1, inplace=True) netAssets = netAssets[netAssets['netAssets'] :0] netAssets['report_date'] = netAssets.index netAssets['release_date'] = netAssets.index netAssetsTTM_ls = [] for ticker in netAssets['ticker'].unique(): try: netAssets_df = ttmDiscrete(netAssets[netAssets['ticker'] == ticker], 'netAssets') netAssets_df['ticker'] = ticker except: # print(ticker + ': net asset error') continue netAssetsTTM_ls.append(netAssets_df) #0营业收入 revenue = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0]] revenue['revenue'] = revenue[0] revenue.drop([0], axis=1, inplace=True) revenue['report_date'] = revenue.index revenue['release_date'] = revenue.index revenueTTM_ls = [] for ticker in revenue['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(revenue[revenue['ticker'] == ticker], 'revenue') reven_df['ticker'] = ticker except: # print(ticker + ': revenue error') continue revenueTTM_ls.append(reven_df) self.revenueTTM = pd.concat(revenueTTM_ls) self.netAssetsTTM = pd.concat(netAssetsTTM_ls)
def prepare_data(self, begin_date, end_date): """ 数据预处理 """ # 获取财务数据: # CATurnover = currentAssets 103 / revenue 0 shifted_begin_date = shift_date(begin_date, 500) bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 103]] bs['release_date'] = bs.index bs['report_date'] = bs.index bs['currentAssets'] = bs[103] bs.drop(103, axis=1, inplace=True) inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0]] inst['release_date'] = inst.index inst['report_date'] = inst.index inst['revenue'] = inst[0] inst.drop([0], axis=1, inplace=True) # TTM Continues处理 revenueTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'revenue') reven_df['ticker'] = ticker except: print(ticker + ': revenue error') continue revenueTTM_ls.append(reven_df) # TTM Discrete 取近期平均 currentAssetsTTM_ls = [] for ticker in bs['ticker'].unique(): try: currentAssets_df = ttmDiscrete(bs[bs['ticker'] == ticker], 'currentAssets') currentAssets_df['ticker'] = ticker except: print(ticker + ': current asset error') continue currentAssetsTTM_ls.append(currentAssets_df) self.revenueTTM = pd.concat(revenueTTM_ls) self.currentAssetsTTM = pd.concat(currentAssetsTTM_ls)
def prepare_data(self, begin_date, end_date): shifted_begin_date = shift_date(begin_date, 700) # totalAssets 121 bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 121]] bs['release_date'] = bs.index bs['report_date'] = bs.index bs['totalAssets'] = bs[121] bs.drop(121, axis=1, inplace=True) # revenue 0, cost 4 inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0, 4]] inst['release_date'] = inst.index inst['report_date'] = inst.index inst['revenue'] = inst[0] inst.drop(0, axis=1, inplace=True) revenueTTM_ls = [] totalAssetsTTM_ls = [] for ticker in inst['ticker'].unique(): try: # 财务数据不足4条会有异常 reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'revenue') reven_df['ticker'] = ticker except: print(ticker + ': revenue error') continue revenueTTM_ls.append(reven_df) for ticker in bs['ticker'].unique(): try: total_asset_df = ttmDiscrete(bs[bs['ticker'] == ticker], 'totalAssets') total_asset_df['ticker'] = ticker except: print(ticker + ': total asset error') continue totalAssetsTTM_ls.append(total_asset_df) self.revenueTTM = pd.concat(revenueTTM_ls) self.totalAssetsTTM = pd.concat(totalAssetsTTM_ls)