def prepare_backtest_models(self): if self.total_data is None: self.prepare_backtest_data() total_data_groups = self.total_data.groupby('trade_date') if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date], _, _ = train_model( ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model, _, _ = train_model( parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) self.alpha_models = models alpha_logger.info("alpha models training finished ...")
def worker(parameters): new_model = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model
def run(self, running_setting): alpha_logger.info("starting backting ...") total_data_groups = self.total_data.groupby('trade_date') rets = [] turn_overs = [] leverags = [] previous_pos = pd.DataFrame() executor = copy.deepcopy(running_setting.executor) positions = pd.DataFrame() if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) for ref_date, this_data in total_data_groups: new_model = models[ref_date] codes = this_data.code.values.tolist() if previous_pos.empty: current_position = None else: previous_pos.set_index('code', inplace=True) remained_pos = previous_pos.loc[codes] remained_pos.fillna(0., inplace=True) current_position = remained_pos.weight.values if running_setting.rebalance_method == 'tv': risk_cov = self.total_risk_cov[self.total_risk_cov.trade_date == ref_date] sec_cov = self._generate_sec_cov(this_data, risk_cov) else: sec_cov = None benchmark_w = this_data.weight.values constraints = LinearConstraints(running_setting.bounds, this_data, benchmark_w) lbound, ubound = self._create_lu_bounds(running_setting, codes, benchmark_w) features = new_model.features dfs = [] for name in features: data_cleaned = this_data.dropna(subset=[name]) raw_factors = data_cleaned[[name]].values new_factors = factor_processing( raw_factors, pre_process=self.data_meta.pre_process, risk_factors=data_cleaned[ self.data_meta.neutralized_risk].values.astype(float) if self.data_meta.neutralized_risk else None, post_process=self.data_meta.post_process) df = pd.DataFrame(new_factors, columns=[name], index=data_cleaned.code) dfs.append(df) new_factors = pd.concat(dfs, axis=1) new_factors = new_factors.loc[codes].fillna(new_factors.median()) er = new_model.predict(new_factors).astype(float) alpha_logger.info('{0} re-balance: {1} codes'.format( ref_date, len(er))) target_pos = self._calculate_pos(running_setting, er, this_data, constraints, benchmark_w, lbound, ubound, sec_cov=sec_cov, current_position=current_position) target_pos['code'] = codes target_pos['trade_date'] = ref_date turn_over, executed_pos = executor.execute(target_pos=target_pos) leverage = executed_pos.weight.abs().sum() ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) rets.append(np.log(1. + ret)) executor.set_current(executed_pos) turn_overs.append(turn_over) leverags.append(leverage) positions = positions.append(executed_pos) previous_pos = executed_pos positions['benchmark_weight'] = self.total_data['weight'].values positions['dx'] = self.total_data.dx.values trade_dates = positions.trade_date.unique() ret_df = pd.DataFrame( { 'returns': rets, 'turn_over': turn_overs, 'leverage': leverags }, index=trade_dates) ret_df['benchmark_returns'] = self.index_return['dx'] ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.freq)] = 0. ret_df = ret_df.shift(1) ret_df.iloc[0] = 0. ret_df['excess_return'] = ret_df[ 'returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] return ret_df, positions
def run(self): alpha_logger.info("starting backting ...") total_factors = self.engine.fetch_factor_range( self.running_setting.universe, self.alpha_model.formulas, dates=self.running_setting.dates) alpha_logger.info("alpha factor data loading finished ...") total_industry = self.engine.fetch_industry_matrix_range( self.running_setting.universe, dates=self.running_setting.dates, category=self.running_setting.industry_cat, level=self.running_setting.industry_level) alpha_logger.info("industry data loading finished ...") total_benchmark = self.engine.fetch_benchmark_range( dates=self.running_setting.dates, benchmark=self.running_setting.benchmark) alpha_logger.info("benchmark data loading finished ...") total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range( self.running_setting.universe, dates=self.running_setting.dates, risk_model=self.data_meta.risk_model) alpha_logger.info("risk_model data loading finished ...") total_returns = self.engine.fetch_dx_return_range( self.running_setting.universe, dates=self.running_setting.dates, horizon=self.running_setting.horizon, offset=1) alpha_logger.info("returns data loading finished ...") total_data = pd.merge(total_factors, total_industry, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left') total_data.fillna({'weight': 0.}, inplace=True) total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']) is_in_benchmark = (total_data.weight > 0.).astype(float).reshape( (-1, 1)) total_data.loc[:, 'benchmark'] = is_in_benchmark total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark) total_data.sort_values(['trade_date', 'code'], inplace=True) total_data_groups = total_data.groupby('trade_date') rets = [] turn_overs = [] leverags = [] previous_pos = pd.DataFrame() executor = copy.deepcopy(self.running_setting.executor) positions = pd.DataFrame() if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) for ref_date, this_data in total_data_groups: new_model = models[ref_date] this_data = this_data.fillna( this_data[new_model.features].median()) codes = this_data.code.values.tolist() if self.running_setting.rebalance_method == 'tv': risk_cov = total_risk_cov[total_risk_cov.trade_date == ref_date] sec_cov = self._generate_sec_cov(this_data, risk_cov) else: sec_cov = None benchmark_w = this_data.weight.values constraints = LinearConstraints(self.running_setting.bounds, this_data, benchmark_w) lbound = np.maximum( 0., benchmark_w - self.running_setting.weights_bandwidth) ubound = self.running_setting.weights_bandwidth + benchmark_w if previous_pos.empty: current_position = None else: previous_pos.set_index('code', inplace=True) remained_pos = previous_pos.loc[codes] remained_pos.fillna(0., inplace=True) current_position = remained_pos.weight.values features = new_model.features raw_factors = this_data[features].values new_factors = factor_processing( raw_factors, pre_process=self.data_meta.pre_process, risk_factors=this_data[self.data_meta.neutralized_risk].values. astype(float) if self.data_meta.neutralized_risk else None, post_process=self.data_meta.post_process) er = new_model.predict(pd.DataFrame( new_factors, columns=features)).astype(float) alpha_logger.info('{0} re-balance: {1} codes'.format( ref_date, len(er))) target_pos = self._calculate_pos(er, this_data, constraints, benchmark_w, lbound, ubound, sec_cov=sec_cov, current_position=current_position, **self.running_setting.more_opts) target_pos['code'] = codes target_pos['trade_date'] = ref_date turn_over, executed_pos = executor.execute(target_pos=target_pos) leverage = executed_pos.weight.abs().sum() ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) rets.append(np.log(1. + ret)) executor.set_current(executed_pos) turn_overs.append(turn_over) leverags.append(leverage) positions = positions.append(executed_pos) previous_pos = executed_pos positions['benchmark_weight'] = total_data['weight'].values positions['dx'] = total_data.dx.values trade_dates = positions.trade_date.unique() ret_df = pd.DataFrame( { 'returns': rets, 'turn_over': turn_overs, 'leverage': leverags }, index=trade_dates) index_return = self.engine.fetch_dx_return_index_range( self.running_setting.benchmark, dates=self.running_setting.dates, horizon=self.running_setting.horizon, offset=1).set_index('trade_date') ret_df['benchmark_returns'] = index_return['dx'] ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.running_setting.freq)] = 0. ret_df = ret_df.shift(1) ret_df.iloc[0] = 0. ret_df['excess_return'] = ret_df[ 'returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] return ret_df, positions