def FactorTest_deal(codes, self_obj): import Analysis_Funs as af import tools.Sample_Tools as smpl import tools.Pretreat_Tools as pretreat import dill import pandas as pd self_ = dill.loads(self_obj) data = smpl.get_data(codes, start=self_.start, end=self_.end, gap=self_.gap) df = smpl.resample_stockdata_low(data.data, freq=self_.freq) ret_forward_re = smpl.get_forward_return(df, 'close') # 为保证ind_Model_Class信息的完整性,使用重采样前的数据来生成指标,后面会另外再重采样。 ind_obj = self_.ind_Model_Class(data.data) ind_obj.fit() ind = pd.DataFrame(ind_obj.ind_df[self_.main_field]) ind.dropna(axis=0, inplace=True) if self_.neutralize.get('enable', False): ind_close = pd.concat([ind, data.close], axis=1) #为了给复权用 ind_close.dropna(axis=0, inplace=True) ind_added = smpl.add_marketvalue_industry( ind_close, static_mv=self_.neutralize.get('static_mv', False)) return (ind_added, ret_forward_re) return (ind, ret_forward_re)
def process(self): data = smpl.get_sample_by_zs(name=self.sample, start=self.start, end=self.end, gap=self.gap, only_main=self.only_main) df = smpl.resample_stockdata_low(data.data, freq=self.freq) # 后续的重采样依赖于ret_forward,否则不同周期下,resample会出现日期不一致的情况。 ret_forward = smpl.get_forward_return(df, 'close') # 为保证ind_Model_Class信息的完整性,使用重采样前的数据来生成指标,后面会另外再重采样。 ind_obj = self.ind_Model_Class(data.data) ind_obj.fit() ind = pd.DataFrame(ind_obj.ind_df[self.main_field]) ind.dropna(axis=0, inplace=True) if self.neutralize.get('enable', False): ind_close = pd.concat([ind, data.close], axis=1) #为了给复权用 ind_close.dropna(axis=0, inplace=True) ind_added = smpl.add_marketvalue_industry( ind_close, static_mv=self.neutralize.get('static_mv', False)) # self.indx1 = ind_added x = ind_added[['totalCapital', 'industry']].sort_index() # x = ind_added[['liquidity_totalCapital','industry']] y = ind_added.iloc[:, 0].sort_index() ind = pretreat.neutralize(y, x, categorical=['industry'], logarithmetics=['totalCapital']) # 取消因子标准化,很多时候标准化后的rank_ic的结果,与分箱测试观测结果不符 # factor_standardized = pretreat.standardize(ind, multi_code=True) self.rank_ic = af.get_rank_ic(ind, ret_forward) else: # neutralize 最后得到的ind是series,而原来的是dataframe # get_rank_ic 内部会做交集,这外面就不必resample了 # factor_standardized = pretreat.standardize(ind, multi_code=True)[self.main_field] self.rank_ic = af.get_rank_ic(ind[self.main_field], ret_forward) # self.a = ind # self.b = ret_forward # self.rank_ic = af.get_rank_ic(factor_standardized, pretreat.standardize(ret_forward, multi_code=True)) self.res = pd.DataFrame( [af.get_ic_desc(self.rank_ic)], columns=['rankIC', 'rankIC_std', 'rankIC_T', 'rankIC_P']) self.res['ICIR'] = round(af.get_ic_ir(self.rank_ic), 6) self.res['winning'] = round(af.get_winning_rate(self.rank_ic), 6) common_index = ind.index.get_level_values(0).unique().intersection( ret_forward.index.get_level_values(0).unique()) ind_resample = ind.loc[common_index] self.ind_ret_df = pd.concat([ind_resample, ret_forward], axis=1) self.ind_ret_df.dropna(axis=0, inplace=True) # 分箱 self.ind_binned = self.ind_ret_df.groupby( level=0, group_keys=False).apply(lambda x: pretreat.binning( x, deal_column=self.main_field, box_count=10, inplace=True))
def binned_plot(self, only_binned=False): # 去除绘图不需要的原始因子和code ind_binned_noindex = self.ind_binned.reset_index().drop( ['code', self.main_field], axis=1) # 按日期分组,组内再按分箱分组求总收益,结果会被倒序。 ind_binned_ret_date = ind_binned_noindex.set_index([ 'date', 'group_label' ]).groupby(level=0).apply(lambda x: x.groupby(level=1).agg(sum)) fig = plt.figure(figsize=(1420 / 72, 320 / 72)) ind_binned_ret_all = ind_binned_noindex.drop( ['date'], axis=1).dropna().set_index('group_label').groupby( level=0).apply(lambda x: x['ret_forward'].sum()) plt.bar(ind_binned_ret_all.index, ind_binned_ret_all) plt.title('分箱平均收益', **PLOT_TITLE) plt.show() if only_binned: return blenchmark = smpl.get_benchmark(name=self.sample, start=self.start, end=self.end, gap=self.gap) blenchmark_re = smpl.resample_stockdata_low(blenchmark.data, freq=self.freq) blenchmark_ret = smpl.get_forward_return(blenchmark_re, 'close') blenchmark_ret.reset_index('code', drop=True, inplace=True) blenchmark_cum = blenchmark_ret.cumsum() fig = plt.figure(figsize=(1420 / 72, 320 / 72)) lns = ind_binned_ret_date.groupby(level=1).apply( lambda x: plt.plot(x.index.get_level_values(0).unique().tolist(), x.values.tolist(), label=x.index.get_level_values(1)[0])) ax2 = plt.gca().twinx() lns = [x[0] for x in lns.values] # lns,为了合并legend lns += ax2.plot(blenchmark_ret, linestyle=":", linewidth=2, color="black", label='bm') labs = [l.get_label() for l in lns] legend = plt.legend(lns, labs, loc='upper left', fontsize='x-small', title='反序\n注意\n10最小') legend.get_title().set_fontsize(fontsize=12) plt.grid(linestyle="dotted", color="lightgray") plt.title('分箱收益变化', **PLOT_TITLE) plt.show() ind_binned_ret_cum = ind_binned_ret_date.groupby( level=1).apply(lambda x: x.cumsum()) fig = plt.figure(figsize=(1420 / 72, 320 / 72)) lns = ind_binned_ret_cum.groupby(level=1).apply( lambda x: plt.plot(x.index.get_level_values(0).unique().tolist(), x.values.tolist(), label=x.index.get_level_values(1)[0])) ax3 = plt.gca().twinx() lns = [x[0] for x in lns.values] # lns,为了合并legend lns += ax3.plot(blenchmark_cum, linestyle=":", linewidth=2, color="black", label='bm') labs = [l.get_label() for l in lns] legend = plt.legend(lns, labs, loc='upper left', fontsize='x-small', title='反序\n注意\n10最小') legend.get_title().set_fontsize(fontsize=12) plt.grid(linestyle="dotted", color="lightgray") plt.title('累计收益率', **PLOT_TITLE) plt.show()