Esempio n. 1
0
    def prepare_backtest_models(self):
        if self.total_data is None:
            self.prepare_backtest_data()
        total_data_groups = self.total_data.groupby('trade_date')
        if self.dask_client is None:
            models = {}
            for ref_date, _ in total_data_groups:
                models[ref_date], _, _ = train_model(
                    ref_date.strftime('%Y-%m-%d'), self.alpha_model,
                    self.data_meta)
        else:

            def worker(parameters):
                new_model, _, _ = train_model(
                    parameters[0].strftime('%Y-%m-%d'), parameters[1],
                    parameters[2])
                return parameters[0], new_model

            l = self.dask_client.map(worker,
                                     [(d[0], self.alpha_model, self.data_meta)
                                      for d in total_data_groups])
            results = self.dask_client.gather(l)
            models = dict(results)
        self.alpha_models = models
        alpha_logger.info("alpha models training finished ...")
Esempio n. 2
0
 def worker(parameters):
     new_model = train_model(parameters[0].strftime('%Y-%m-%d'),
                             parameters[1], parameters[2])
     return parameters[0], new_model
Esempio n. 3
0
    def run(self, running_setting):
        alpha_logger.info("starting backting ...")
        total_data_groups = self.total_data.groupby('trade_date')

        rets = []
        turn_overs = []
        leverags = []
        previous_pos = pd.DataFrame()
        executor = copy.deepcopy(running_setting.executor)
        positions = pd.DataFrame()

        if self.dask_client is None:
            models = {}
            for ref_date, _ in total_data_groups:
                models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'),
                                               self.alpha_model,
                                               self.data_meta)
        else:

            def worker(parameters):
                new_model = train_model(parameters[0].strftime('%Y-%m-%d'),
                                        parameters[1], parameters[2])
                return parameters[0], new_model

            l = self.dask_client.map(worker,
                                     [(d[0], self.alpha_model, self.data_meta)
                                      for d in total_data_groups])
            results = self.dask_client.gather(l)
            models = dict(results)

        for ref_date, this_data in total_data_groups:
            new_model = models[ref_date]
            codes = this_data.code.values.tolist()

            if previous_pos.empty:
                current_position = None
            else:
                previous_pos.set_index('code', inplace=True)
                remained_pos = previous_pos.loc[codes]

                remained_pos.fillna(0., inplace=True)
                current_position = remained_pos.weight.values

            if running_setting.rebalance_method == 'tv':
                risk_cov = self.total_risk_cov[self.total_risk_cov.trade_date
                                               == ref_date]
                sec_cov = self._generate_sec_cov(this_data, risk_cov)
            else:
                sec_cov = None

            benchmark_w = this_data.weight.values
            constraints = LinearConstraints(running_setting.bounds, this_data,
                                            benchmark_w)

            lbound, ubound = self._create_lu_bounds(running_setting, codes,
                                                    benchmark_w)

            features = new_model.features
            dfs = []
            for name in features:
                data_cleaned = this_data.dropna(subset=[name])
                raw_factors = data_cleaned[[name]].values
                new_factors = factor_processing(
                    raw_factors,
                    pre_process=self.data_meta.pre_process,
                    risk_factors=data_cleaned[
                        self.data_meta.neutralized_risk].values.astype(float)
                    if self.data_meta.neutralized_risk else None,
                    post_process=self.data_meta.post_process)
                df = pd.DataFrame(new_factors,
                                  columns=[name],
                                  index=data_cleaned.code)
                dfs.append(df)

            new_factors = pd.concat(dfs, axis=1)
            new_factors = new_factors.loc[codes].fillna(new_factors.median())
            er = new_model.predict(new_factors).astype(float)

            alpha_logger.info('{0} re-balance: {1} codes'.format(
                ref_date, len(er)))
            target_pos = self._calculate_pos(running_setting,
                                             er,
                                             this_data,
                                             constraints,
                                             benchmark_w,
                                             lbound,
                                             ubound,
                                             sec_cov=sec_cov,
                                             current_position=current_position)

            target_pos['code'] = codes
            target_pos['trade_date'] = ref_date

            turn_over, executed_pos = executor.execute(target_pos=target_pos)
            leverage = executed_pos.weight.abs().sum()

            ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) -
                                                1.)
            rets.append(np.log(1. + ret))
            executor.set_current(executed_pos)
            turn_overs.append(turn_over)
            leverags.append(leverage)
            positions = positions.append(executed_pos)
            previous_pos = executed_pos

        positions['benchmark_weight'] = self.total_data['weight'].values
        positions['dx'] = self.total_data.dx.values

        trade_dates = positions.trade_date.unique()
        ret_df = pd.DataFrame(
            {
                'returns': rets,
                'turn_over': turn_overs,
                'leverage': leverags
            },
            index=trade_dates)

        ret_df['benchmark_returns'] = self.index_return['dx']
        ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1],
                                         self.freq)] = 0.
        ret_df = ret_df.shift(1)
        ret_df.iloc[0] = 0.
        ret_df['excess_return'] = ret_df[
            'returns'] - ret_df['benchmark_returns'] * ret_df['leverage']

        return ret_df, positions
Esempio n. 4
0
    def run(self):
        alpha_logger.info("starting backting ...")

        total_factors = self.engine.fetch_factor_range(
            self.running_setting.universe,
            self.alpha_model.formulas,
            dates=self.running_setting.dates)
        alpha_logger.info("alpha factor data loading finished ...")

        total_industry = self.engine.fetch_industry_matrix_range(
            self.running_setting.universe,
            dates=self.running_setting.dates,
            category=self.running_setting.industry_cat,
            level=self.running_setting.industry_level)
        alpha_logger.info("industry data loading finished ...")

        total_benchmark = self.engine.fetch_benchmark_range(
            dates=self.running_setting.dates,
            benchmark=self.running_setting.benchmark)
        alpha_logger.info("benchmark data loading finished ...")

        total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range(
            self.running_setting.universe,
            dates=self.running_setting.dates,
            risk_model=self.data_meta.risk_model)
        alpha_logger.info("risk_model data loading finished ...")

        total_returns = self.engine.fetch_dx_return_range(
            self.running_setting.universe,
            dates=self.running_setting.dates,
            horizon=self.running_setting.horizon,
            offset=1)
        alpha_logger.info("returns data loading finished ...")

        total_data = pd.merge(total_factors,
                              total_industry,
                              on=['trade_date', 'code'])
        total_data = pd.merge(total_data,
                              total_benchmark,
                              on=['trade_date', 'code'],
                              how='left')
        total_data.fillna({'weight': 0.}, inplace=True)
        total_data = pd.merge(total_data,
                              total_returns,
                              on=['trade_date', 'code'])
        total_data = pd.merge(total_data,
                              total_risk_exposure,
                              on=['trade_date', 'code'])

        is_in_benchmark = (total_data.weight > 0.).astype(float).reshape(
            (-1, 1))
        total_data.loc[:, 'benchmark'] = is_in_benchmark
        total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark)
        total_data.sort_values(['trade_date', 'code'], inplace=True)
        total_data_groups = total_data.groupby('trade_date')

        rets = []
        turn_overs = []
        leverags = []
        previous_pos = pd.DataFrame()
        executor = copy.deepcopy(self.running_setting.executor)
        positions = pd.DataFrame()

        if self.dask_client is None:
            models = {}
            for ref_date, _ in total_data_groups:
                models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'),
                                               self.alpha_model,
                                               self.data_meta)
        else:

            def worker(parameters):
                new_model = train_model(parameters[0].strftime('%Y-%m-%d'),
                                        parameters[1], parameters[2])
                return parameters[0], new_model

            l = self.dask_client.map(worker,
                                     [(d[0], self.alpha_model, self.data_meta)
                                      for d in total_data_groups])
            results = self.dask_client.gather(l)
            models = dict(results)

        for ref_date, this_data in total_data_groups:
            new_model = models[ref_date]

            this_data = this_data.fillna(
                this_data[new_model.features].median())
            codes = this_data.code.values.tolist()

            if self.running_setting.rebalance_method == 'tv':
                risk_cov = total_risk_cov[total_risk_cov.trade_date ==
                                          ref_date]
                sec_cov = self._generate_sec_cov(this_data, risk_cov)
            else:
                sec_cov = None

            benchmark_w = this_data.weight.values
            constraints = LinearConstraints(self.running_setting.bounds,
                                            this_data, benchmark_w)

            lbound = np.maximum(
                0., benchmark_w - self.running_setting.weights_bandwidth)
            ubound = self.running_setting.weights_bandwidth + benchmark_w

            if previous_pos.empty:
                current_position = None
            else:
                previous_pos.set_index('code', inplace=True)
                remained_pos = previous_pos.loc[codes]

                remained_pos.fillna(0., inplace=True)
                current_position = remained_pos.weight.values

            features = new_model.features
            raw_factors = this_data[features].values
            new_factors = factor_processing(
                raw_factors,
                pre_process=self.data_meta.pre_process,
                risk_factors=this_data[self.data_meta.neutralized_risk].values.
                astype(float) if self.data_meta.neutralized_risk else None,
                post_process=self.data_meta.post_process)

            er = new_model.predict(pd.DataFrame(
                new_factors, columns=features)).astype(float)

            alpha_logger.info('{0} re-balance: {1} codes'.format(
                ref_date, len(er)))
            target_pos = self._calculate_pos(er,
                                             this_data,
                                             constraints,
                                             benchmark_w,
                                             lbound,
                                             ubound,
                                             sec_cov=sec_cov,
                                             current_position=current_position,
                                             **self.running_setting.more_opts)

            target_pos['code'] = codes
            target_pos['trade_date'] = ref_date

            turn_over, executed_pos = executor.execute(target_pos=target_pos)
            leverage = executed_pos.weight.abs().sum()

            ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) -
                                                1.)
            rets.append(np.log(1. + ret))
            executor.set_current(executed_pos)
            turn_overs.append(turn_over)
            leverags.append(leverage)
            positions = positions.append(executed_pos)
            previous_pos = executed_pos

        positions['benchmark_weight'] = total_data['weight'].values
        positions['dx'] = total_data.dx.values

        trade_dates = positions.trade_date.unique()
        ret_df = pd.DataFrame(
            {
                'returns': rets,
                'turn_over': turn_overs,
                'leverage': leverags
            },
            index=trade_dates)

        index_return = self.engine.fetch_dx_return_index_range(
            self.running_setting.benchmark,
            dates=self.running_setting.dates,
            horizon=self.running_setting.horizon,
            offset=1).set_index('trade_date')
        ret_df['benchmark_returns'] = index_return['dx']
        ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1],
                                         self.running_setting.freq)] = 0.
        ret_df = ret_df.shift(1)
        ret_df.iloc[0] = 0.
        ret_df['excess_return'] = ret_df[
            'returns'] - ret_df['benchmark_returns'] * ret_df['leverage']

        return ret_df, positions