def prepare_backtest_models(self): if self.total_data is None: self.prepare_backtest_data() total_data_groups = self.total_data.groupby('trade_date') if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date], _, _ = train_model( ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model, _, _ = train_model( parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) self.alpha_models = models alpha_logger.info("alpha models training finished ...")
def cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk, industry_matrix, dx_returns): total_data = pd.merge(factor_data, risk_exposure, on='code') total_data = pd.merge(total_data, industry_matrix, on='code').dropna() total_risk_exp = total_data[constraint_risk] er = total_data[factor_name].values.astype(float) er = factor_processing(er, [], total_risk_exp.values, []).flatten() industry = total_data.industry_name.values codes = total_data.code.tolist() target_pos = pd.DataFrame({'code': codes, 'weight': er, 'industry': industry}) target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs().sum() target_pos = pd.merge(target_pos, dx_returns, on=['code']) target_pos = pd.merge(target_pos, total_data[['code'] + constraint_risk], on=['code']) activate_weight = target_pos.weight.values excess_return = np.exp(target_pos.dx.values) - 1. port_ret = np.log(activate_weight @ excess_return + 1.) ic = np.corrcoef(excess_return, activate_weight)[0, 1] x = sm.add_constant(activate_weight) results = sm.OLS(excess_return, x).fit() t_stats = results.tvalues[1] alpha_logger.info(f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}") alpha_logger.info(f"{ref_date} risk_exposure: " f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}") return port_ret, ic, t_stats
def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk): risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1] alpha_logger.info("risk data loading finished") used_neutralized_risk = list(set(total_risk_factors).difference(names)) risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() return_df = pd.merge(return_df, risk_df, on=['trade_date', 'code']) if neutralized_risk: train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_y = return_df.copy() risk_exp = train_x[neutralized_risk].values.astype(float) x_values = train_x[names].values.astype(float) y_values = train_y[['dx']].values else: risk_exp = None train_x = factor_df.copy() train_y = return_df.copy() x_values = train_x[names].values.astype(float) y_values = train_y[['dx']].values codes = train_x['code'].values date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime() dates = np.unique(date_label) return return_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes
def check_holiday(this_date): flag = isBizDay('china.sse', this_date) if not flag: alpha_logger.info( 'Job will be omitted as {0} is a holiday'.format(this_date)) return flag
def data_info_log(df, table): data_len = len(df) if data_len > 0: alpha_logger.info("{0} records will be inserted in {1}".format( data_len, table)) else: msg = "No records will be inserted in {0}".format(table) alpha_logger.warning(msg) raise ValueError(msg)
def prepare_backtest_data(self): total_factors = self.engine.fetch_factor_range( self.universe, self.alpha_model.formulas, dates=self.dates) alpha_logger.info("alpha factor data loading finished ...") total_industry = self.engine.fetch_industry_matrix_range( self.universe, dates=self.dates, category=self.industry_cat, level=self.industry_level) alpha_logger.info("industry data loading finished ...") total_benchmark = self.engine.fetch_benchmark_range( dates=self.dates, benchmark=self.benchmark) alpha_logger.info("benchmark data loading finished ...") total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range( self.universe, dates=self.dates, risk_model=self.data_meta.risk_model) alpha_logger.info("risk_model data loading finished ...") total_returns = self.engine.fetch_dx_return_range(self.universe, dates=self.dates, horizon=self.horizon, offset=1) alpha_logger.info("returns data loading finished ...") total_data = pd.merge(total_factors, total_industry, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left') total_data.fillna({'weight': 0.}, inplace=True) total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']) is_in_benchmark = (total_data.weight > 0.).astype(float).values.reshape((-1, 1)) total_data.loc[:, 'benchmark'] = is_in_benchmark total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark) total_data.sort_values(['trade_date', 'code'], inplace=True) self.index_return = self.engine.fetch_dx_return_index_range( self.benchmark, dates=self.dates, horizon=self.horizon, offset=1).set_index('trade_date') self.total_data = total_data self.total_risk_cov = total_risk_cov
def cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk, industry_matrix, dx_returns): total_data = pd.merge(factor_data, risk_exposure, on='code') total_data = pd.merge(total_data, industry_matrix, on='code') total_data = total_data.replace([np.inf, -np.inf], np.nan).dropna() if len(total_data) < 0.33 * len(factor_data): alpha_logger.warning( f"valid data point({len(total_data)}) " f"is less than 33% of the total sample ({len(factor_data)}). Omit this run" ) return np.nan, np.nan, np.nan total_risk_exp = total_data[constraint_risk] er = total_data[[factor_name]].values.astype(float) er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values, [standardize]).flatten() industry = total_data.industry_name.values codes = total_data.code.tolist() target_pos = pd.DataFrame({ 'code': codes, 'weight': er, 'industry': industry }) target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs( ).sum() target_pos = pd.merge(target_pos, dx_returns, on=['code']) target_pos = pd.merge(target_pos, total_data[['code'] + constraint_risk], on=['code']) total_risk_exp = target_pos[constraint_risk] activate_weight = target_pos['weight'].values excess_return = np.exp(target_pos[['dx']].values) - 1. excess_return = factor_processing( excess_return, [winsorize_normal, standardize], total_risk_exp.values, [winsorize_normal, standardize]).flatten() port_ret = np.log(activate_weight @ excess_return + 1.) ic = np.corrcoef(excess_return, activate_weight)[0, 1] x = sm.add_constant(activate_weight) results = sm.OLS(excess_return, x).fit() t_stats = results.tvalues[1] alpha_logger.info( f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}" ) alpha_logger.info( f"{ref_date} risk_exposure: " f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}" ) return port_ret, ic, t_stats
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) alpha_logger.info("return data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) return dates, df[['trade_date', 'code', 'dx']], df[[ 'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry' ] + transformer.names]
def fetch_data_package(engine: SqlEngine, alpha_factors: Iterable[object], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0, batch: int = 1, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None) -> dict: alpha_logger.info("Starting data package fetching ...") transformer = Transformer(alpha_factors) dates, return_df, factor_df = prepare_data(engine, transformer, start_date, end_date, frequency, universe, benchmark, warm_start) return_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y = \ _merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk) return_df['weight'] = train_x['weight'] return_df['industry'] = train_x['industry'] return_df['industry_code'] = train_x['industry_code'] return_df['isOpen'] = train_x['isOpen'] if neutralized_risk: for i, name in enumerate(neutralized_risk): return_df.loc[:, name] = risk_exp[:, i] alpha_logger.info("Loading data is finished") train_x_buckets, train_y_buckets, predict_x_buckets, predict_y_buckets = batch_processing( x_values, y_values, dates, date_label, batch, risk_exp, pre_process, post_process) alpha_logger.info("Data processing is finished") ret = dict() ret['x_names'] = transformer.names ret['settlement'] = return_df ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets} ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets} return ret
def update_uqer_index_components(ds, **kwargs): ref_date, this_date = process_date(ds) flag = check_holiday(this_date) if not flag: return index_codes = [ '000001', '000002', '000003', '000004', '000005', '000006', '000007', '000008', '000009', '000010', '000015', '000016', '000020', '000090', '000132', '000133', '000300', '000852', '000902', '000903', '000904', '000905', '000906', '000907', '000922', '399001', '399002', '399004', '399005', '399006', '399007', '399008', '399009', '399010', '399011', '399012', '399013', '399107', '399324', '399330', '399333', '399400', '399401', '399649' ] total_data = pd.DataFrame() for index in index_codes: df = api.IdxCloseWeightGet(ticker=index, beginDate=ref_date, endDate=ref_date) if df.empty: ref_previous_date = advanceDateByCalendar('china.sse', this_date, '-1b') query = select([IndexComponent]).where( and_(IndexComponent.trade_date == ref_previous_date, IndexComponent.indexCode == int(index))) df = pd.read_sql(query, engine) df['trade_date'] = this_date if df.empty: continue alpha_logger.info('{0} is finished with previous data {1}'.format( index, len(df))) else: ################################ # 2017-10-09, patch for uqer bug def filter_out_eqy(code: str): if code[0] in ['0', '3'] and code[-4:] in ['XSHE']: return True elif code[0] in ['6'] and code[-4:] in ['XSHG']: return True else: return False df = df[df.consID.apply(lambda x: filter_out_eqy(x))] ################################ df.rename(columns={ 'ticker': 'indexCode', 'secShortName': 'indexShortName', 'consTickerSymbol': 'code', 'consExchangeCD': 'exchangeCD', 'consShortName': 'secShortName' }, inplace=True) df['indexCode'] = df.indexCode.astype(int) df['code'] = df.code.astype(int) df['trade_date'] = this_date del df['secID'] del df['consID'] alpha_logger.info('{0} is finished with new data {1}'.format( index, len(df))) total_data = total_data.append(df) index_codes = total_data.indexCode.unique() index_codes = [int(index) for index in index_codes] query = delete(IndexComponent).where( and_(IndexComponent.trade_date == this_date, IndexComponent.indexCode.in_(index_codes))) engine.execute(query) if total_data.empty: return data_info_log(total_data, IndexComponent) format_data(total_data) total_data.to_sql(IndexComponent.__table__.name, engine, index=False, if_exists='append')
def process_date(ds): alpha_logger.info("Loading data at {0}".format(ds)) this_date = dt.datetime.strptime(ds, '%Y-%m-%d') ref_date = this_date.strftime('%Y%m%d') return ref_date, this_date
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0, fit_target: Union[Transformer, object] = None): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) alpha_logger.info("fit target data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) df.dropna(inplace=True) return dates, df[[ 'trade_date', 'code', 'dx' ]], df[['trade_date', 'code', 'weight', 'industry_code', 'industry'] + transformer.names]
def fetch_data_package(engine: SqlEngine, alpha_factors: Iterable[object], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0, batch: int = 1, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, fit_target: Union[Transformer, object] = None) -> dict: alpha_logger.info("Starting data package fetching ...") transformer = Transformer(alpha_factors) names = transformer.names dates, target_df, factor_df = prepare_data(engine, transformer, start_date, end_date, frequency, universe, benchmark, warm_start + batch, fit_target=fit_target) target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \ _merge_df(engine, names, factor_df, target_df, universe, dates, risk_model, neutralized_risk) alpha_logger.info("data merging finished") target_df['weight'] = train_x['weight'] target_df['industry'] = train_x['industry'] target_df['industry_code'] = train_x['industry_code'] if neutralized_risk: for i, name in enumerate(neutralized_risk): target_df.loc[:, name] = risk_exp[:, i] alpha_logger.info("Loading data is finished") train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets, predict_codes_bucket \ = batch_processing(names, x_values, y_values, dates, date_label, batch, risk_exp, pre_process, post_process, codes) alpha_logger.info("Data processing is finished") ret = dict() ret['x_names'] = names ret['settlement'] = target_df[target_df.trade_date >= start_date] train_x_buckets = { k: train_x_buckets[k] for k in train_x_buckets if k.strftime('%Y-%m-%d') >= start_date } train_y_buckets = { k: train_y_buckets[k] for k in train_y_buckets if k.strftime('%Y-%m-%d') >= start_date } train_risk_buckets = { k: train_risk_buckets[k] for k in train_risk_buckets if k.strftime('%Y-%m-%d') >= start_date } predict_x_buckets = { k: predict_x_buckets[k] for k in predict_x_buckets if k.strftime('%Y-%m-%d') >= start_date } predict_y_buckets = { k: predict_y_buckets[k] for k in predict_y_buckets if k.strftime('%Y-%m-%d') >= start_date } if neutralized_risk: predict_risk_buckets = { k: predict_risk_buckets[k] for k in predict_risk_buckets if k.strftime('%Y-%m-%d') >= start_date } else: predict_risk_buckets = None predict_codes_bucket = { k: predict_codes_bucket[k] for k in predict_codes_bucket if k.strftime('%Y-%m-%d') >= start_date } ret['train'] = { 'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets } ret['predict'] = { 'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets, 'code': predict_codes_bucket } return ret
def setUp(self): self.engine = SqlEngine(DATA_ENGINE_URI) dates_list = bizDatesList('china.sse', '2010-10-01', '2018-04-27') self.ref_date = random.choice(dates_list).strftime('%Y-%m-%d') alpha_logger.info("Test date: {0}".format(self.ref_date))
def run(self): alpha_logger.info("starting backting ...") total_factors = self.engine.fetch_factor_range( self.running_setting.universe, self.alpha_model.formulas, dates=self.running_setting.dates) alpha_logger.info("alpha factor data loading finished ...") total_industry = self.engine.fetch_industry_matrix_range( self.running_setting.universe, dates=self.running_setting.dates, category=self.running_setting.industry_cat, level=self.running_setting.industry_level) alpha_logger.info("industry data loading finished ...") total_benchmark = self.engine.fetch_benchmark_range( dates=self.running_setting.dates, benchmark=self.running_setting.benchmark) alpha_logger.info("benchmark data loading finished ...") total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range( self.running_setting.universe, dates=self.running_setting.dates, risk_model=self.data_meta.risk_model) alpha_logger.info("risk_model data loading finished ...") total_returns = self.engine.fetch_dx_return_range( self.running_setting.universe, dates=self.running_setting.dates, horizon=self.running_setting.horizon, offset=1) alpha_logger.info("returns data loading finished ...") total_data = pd.merge(total_factors, total_industry, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left') total_data.fillna({'weight': 0.}, inplace=True) total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']) is_in_benchmark = (total_data.weight > 0.).astype(float).reshape( (-1, 1)) total_data.loc[:, 'benchmark'] = is_in_benchmark total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark) total_data.sort_values(['trade_date', 'code'], inplace=True) total_data_groups = total_data.groupby('trade_date') rets = [] turn_overs = [] leverags = [] previous_pos = pd.DataFrame() executor = copy.deepcopy(self.running_setting.executor) positions = pd.DataFrame() if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) for ref_date, this_data in total_data_groups: new_model = models[ref_date] this_data = this_data.fillna( this_data[new_model.features].median()) codes = this_data.code.values.tolist() if self.running_setting.rebalance_method == 'tv': risk_cov = total_risk_cov[total_risk_cov.trade_date == ref_date] sec_cov = self._generate_sec_cov(this_data, risk_cov) else: sec_cov = None benchmark_w = this_data.weight.values constraints = LinearConstraints(self.running_setting.bounds, this_data, benchmark_w) lbound = np.maximum( 0., benchmark_w - self.running_setting.weights_bandwidth) ubound = self.running_setting.weights_bandwidth + benchmark_w if previous_pos.empty: current_position = None else: previous_pos.set_index('code', inplace=True) remained_pos = previous_pos.loc[codes] remained_pos.fillna(0., inplace=True) current_position = remained_pos.weight.values features = new_model.features raw_factors = this_data[features].values new_factors = factor_processing( raw_factors, pre_process=self.data_meta.pre_process, risk_factors=this_data[self.data_meta.neutralized_risk].values. astype(float) if self.data_meta.neutralized_risk else None, post_process=self.data_meta.post_process) er = new_model.predict(pd.DataFrame( new_factors, columns=features)).astype(float) alpha_logger.info('{0} re-balance: {1} codes'.format( ref_date, len(er))) target_pos = self._calculate_pos(er, this_data, constraints, benchmark_w, lbound, ubound, sec_cov=sec_cov, current_position=current_position, **self.running_setting.more_opts) target_pos['code'] = codes target_pos['trade_date'] = ref_date turn_over, executed_pos = executor.execute(target_pos=target_pos) leverage = executed_pos.weight.abs().sum() ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) rets.append(np.log(1. + ret)) executor.set_current(executed_pos) turn_overs.append(turn_over) leverags.append(leverage) positions = positions.append(executed_pos) previous_pos = executed_pos positions['benchmark_weight'] = total_data['weight'].values positions['dx'] = total_data.dx.values trade_dates = positions.trade_date.unique() ret_df = pd.DataFrame( { 'returns': rets, 'turn_over': turn_overs, 'leverage': leverags }, index=trade_dates) index_return = self.engine.fetch_dx_return_index_range( self.running_setting.benchmark, dates=self.running_setting.dates, horizon=self.running_setting.horizon, offset=1).set_index('trade_date') ret_df['benchmark_returns'] = index_return['dx'] ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.running_setting.freq)] = 0. ret_df = ret_df.shift(1) ret_df.iloc[0] = 0. ret_df['excess_return'] = ret_df[ 'returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] return ret_df, positions
def run(self, running_setting): alpha_logger.info("starting backting ...") total_data_groups = self.total_data.groupby('trade_date') rets = [] turn_overs = [] leverags = [] previous_pos = pd.DataFrame() executor = copy.deepcopy(running_setting.executor) positions = pd.DataFrame() if self.alpha_models is None: self.prepare_backtest_models() for ref_date, this_data in total_data_groups: risk_model = self.risk_models[ref_date] new_model = self.alpha_models[ref_date] codes = this_data.code.values.tolist() if previous_pos.empty: current_position = None else: previous_pos.set_index('code', inplace=True) remained_pos = previous_pos.reindex(codes) remained_pos.fillna(0., inplace=True) current_position = remained_pos.weight.values benchmark_w = this_data.weight.values constraints = LinearConstraints(running_setting.bounds, this_data, benchmark_w) lbound, ubound = self._create_lu_bounds(running_setting, codes, benchmark_w) this_data.fillna(0, inplace=True) new_factors = factor_processing( this_data[new_model.features].values, pre_process=self.data_meta.pre_process, risk_factors=this_data[self.data_meta.neutralized_risk].values. astype(float) if self.data_meta.neutralized_risk else None, post_process=self.data_meta.post_process) new_factors = pd.DataFrame(new_factors, columns=new_model.features, index=codes) er = new_model.predict(new_factors).astype(float) alpha_logger.info('{0} re-balance: {1} codes'.format( ref_date, len(er))) target_pos = self._calculate_pos( running_setting, er, this_data, constraints, benchmark_w, lbound, ubound, risk_model=risk_model.get_risk_profile(codes), current_position=current_position) target_pos['code'] = codes target_pos['trade_date'] = ref_date turn_over, executed_pos = executor.execute(target_pos=target_pos) leverage = executed_pos.weight.abs().sum() ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) rets.append(np.log(1. + ret)) executor.set_current(executed_pos) turn_overs.append(turn_over) leverags.append(leverage) positions = positions.append(executed_pos) previous_pos = executed_pos positions['benchmark_weight'] = self.total_data['weight'].values positions['dx'] = self.total_data.dx.values trade_dates = positions.trade_date.unique() ret_df = pd.DataFrame( { 'returns': rets, 'turn_over': turn_overs, 'leverage': leverags }, index=trade_dates) ret_df['benchmark_returns'] = self.index_return['dx'] ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.freq)] = 0. ret_df = ret_df.shift(1) ret_df.iloc[0] = 0. ret_df['excess_return'] = ret_df[ 'returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] return ret_df, positions
def update_materialized_views(ds, **kwargs): alpha_logger.info("starting refresh full_factor_view ...") engine.execute("REFRESH MATERIALIZED VIEW CONCURRENTLY full_factor_view;") alpha_logger.info("starting cluster full_factor_view ...") engine.execute("CLUSTER full_factor_view;")
def initdb(args): alpha_logger.info('DB: ' + args.url) engine = create_engine(args.url) models.Base.metadata.create_all(engine) alpha_logger.info('DB: initialization finished.')
def run(self, running_setting): alpha_logger.info("starting backting ...") total_data_groups = self.total_data.groupby('trade_date') rets = [] turn_overs = [] leverags = [] previous_pos = pd.DataFrame() executor = copy.deepcopy(running_setting.executor) positions = pd.DataFrame() if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) for ref_date, this_data in total_data_groups: new_model = models[ref_date] codes = this_data.code.values.tolist() if previous_pos.empty: current_position = None else: previous_pos.set_index('code', inplace=True) remained_pos = previous_pos.loc[codes] remained_pos.fillna(0., inplace=True) current_position = remained_pos.weight.values if running_setting.rebalance_method == 'tv': risk_cov = self.total_risk_cov[self.total_risk_cov.trade_date == ref_date] sec_cov = self._generate_sec_cov(this_data, risk_cov) else: sec_cov = None benchmark_w = this_data.weight.values constraints = LinearConstraints(running_setting.bounds, this_data, benchmark_w) lbound, ubound = self._create_lu_bounds(running_setting, codes, benchmark_w) features = new_model.features dfs = [] for name in features: data_cleaned = this_data.dropna(subset=[name]) raw_factors = data_cleaned[[name]].values new_factors = factor_processing( raw_factors, pre_process=self.data_meta.pre_process, risk_factors=data_cleaned[ self.data_meta.neutralized_risk].values.astype(float) if self.data_meta.neutralized_risk else None, post_process=self.data_meta.post_process) df = pd.DataFrame(new_factors, columns=[name], index=data_cleaned.code) dfs.append(df) new_factors = pd.concat(dfs, axis=1) new_factors = new_factors.loc[codes].fillna(new_factors.median()) er = new_model.predict(new_factors).astype(float) alpha_logger.info('{0} re-balance: {1} codes'.format( ref_date, len(er))) target_pos = self._calculate_pos(running_setting, er, this_data, constraints, benchmark_w, lbound, ubound, sec_cov=sec_cov, current_position=current_position) target_pos['code'] = codes target_pos['trade_date'] = ref_date turn_over, executed_pos = executor.execute(target_pos=target_pos) leverage = executed_pos.weight.abs().sum() ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) rets.append(np.log(1. + ret)) executor.set_current(executed_pos) turn_overs.append(turn_over) leverags.append(leverage) positions = positions.append(executed_pos) previous_pos = executed_pos positions['benchmark_weight'] = self.total_data['weight'].values positions['dx'] = self.total_data.dx.values trade_dates = positions.trade_date.unique() ret_df = pd.DataFrame( { 'returns': rets, 'turn_over': turn_overs, 'leverage': leverags }, index=trade_dates) ret_df['benchmark_returns'] = self.index_return['dx'] ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.freq)] = 0. ret_df = ret_df.shift(1) ret_df.iloc[0] = 0. ret_df['excess_return'] = ret_df[ 'returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] return ret_df, positions