Example #1
0
    def prepare_backtest_models(self):
        if self.total_data is None:
            self.prepare_backtest_data()
        total_data_groups = self.total_data.groupby('trade_date')
        if self.dask_client is None:
            models = {}
            for ref_date, _ in total_data_groups:
                models[ref_date], _, _ = train_model(
                    ref_date.strftime('%Y-%m-%d'), self.alpha_model,
                    self.data_meta)
        else:

            def worker(parameters):
                new_model, _, _ = train_model(
                    parameters[0].strftime('%Y-%m-%d'), parameters[1],
                    parameters[2])
                return parameters[0], new_model

            l = self.dask_client.map(worker,
                                     [(d[0], self.alpha_model, self.data_meta)
                                      for d in total_data_groups])
            results = self.dask_client.gather(l)
            models = dict(results)
        self.alpha_models = models
        alpha_logger.info("alpha models training finished ...")
Example #2
0
def cs_impl(ref_date,
            factor_data,
            factor_name,
            risk_exposure,
            constraint_risk,
            industry_matrix,
            dx_returns):
    total_data = pd.merge(factor_data, risk_exposure, on='code')
    total_data = pd.merge(total_data, industry_matrix, on='code').dropna()
    total_risk_exp = total_data[constraint_risk]

    er = total_data[factor_name].values.astype(float)
    er = factor_processing(er, [], total_risk_exp.values, []).flatten()
    industry = total_data.industry_name.values

    codes = total_data.code.tolist()
    target_pos = pd.DataFrame({'code': codes,
                               'weight': er,
                               'industry': industry})
    target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs().sum()
    target_pos = pd.merge(target_pos, dx_returns, on=['code'])
    target_pos = pd.merge(target_pos, total_data[['code'] + constraint_risk], on=['code'])
    activate_weight = target_pos.weight.values
    excess_return = np.exp(target_pos.dx.values) - 1.
    port_ret = np.log(activate_weight @ excess_return + 1.)
    ic = np.corrcoef(excess_return, activate_weight)[0, 1]
    x = sm.add_constant(activate_weight)
    results = sm.OLS(excess_return, x).fit()
    t_stats = results.tvalues[1]

    alpha_logger.info(f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}")
    alpha_logger.info(f"{ref_date} risk_exposure: "
                      f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}")
    return port_ret, ic, t_stats
Example #3
0
def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model,
              neutralized_risk):
    risk_df = engine.fetch_risk_model_range(universe,
                                            dates=dates,
                                            risk_model=risk_model)[1]
    alpha_logger.info("risk data loading finished")
    used_neutralized_risk = list(set(total_risk_factors).difference(names))
    risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
    return_df = pd.merge(return_df, risk_df, on=['trade_date', 'code'])

    if neutralized_risk:
        train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
        train_y = return_df.copy()

        risk_exp = train_x[neutralized_risk].values.astype(float)
        x_values = train_x[names].values.astype(float)
        y_values = train_y[['dx']].values
    else:
        risk_exp = None
        train_x = factor_df.copy()
        train_y = return_df.copy()
        x_values = train_x[names].values.astype(float)
        y_values = train_y[['dx']].values

    codes = train_x['code'].values
    date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
    dates = np.unique(date_label)
    return return_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes
Example #4
0
def check_holiday(this_date):
    flag = isBizDay('china.sse', this_date)

    if not flag:
        alpha_logger.info(
            'Job will be omitted as {0} is a holiday'.format(this_date))

    return flag
Example #5
0
def data_info_log(df, table):
    data_len = len(df)

    if data_len > 0:
        alpha_logger.info("{0} records will be inserted in {1}".format(
            data_len, table))
    else:
        msg = "No records will be inserted in {0}".format(table)
        alpha_logger.warning(msg)
        raise ValueError(msg)
Example #6
0
    def prepare_backtest_data(self):
        total_factors = self.engine.fetch_factor_range(
            self.universe, self.alpha_model.formulas, dates=self.dates)
        alpha_logger.info("alpha factor data loading finished ...")

        total_industry = self.engine.fetch_industry_matrix_range(
            self.universe,
            dates=self.dates,
            category=self.industry_cat,
            level=self.industry_level)
        alpha_logger.info("industry data loading finished ...")

        total_benchmark = self.engine.fetch_benchmark_range(
            dates=self.dates, benchmark=self.benchmark)
        alpha_logger.info("benchmark data loading finished ...")

        total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range(
            self.universe,
            dates=self.dates,
            risk_model=self.data_meta.risk_model)
        alpha_logger.info("risk_model data loading finished ...")

        total_returns = self.engine.fetch_dx_return_range(self.universe,
                                                          dates=self.dates,
                                                          horizon=self.horizon,
                                                          offset=1)
        alpha_logger.info("returns data loading finished ...")

        total_data = pd.merge(total_factors,
                              total_industry,
                              on=['trade_date', 'code'])
        total_data = pd.merge(total_data,
                              total_benchmark,
                              on=['trade_date', 'code'],
                              how='left')
        total_data.fillna({'weight': 0.}, inplace=True)
        total_data = pd.merge(total_data,
                              total_returns,
                              on=['trade_date', 'code'])
        total_data = pd.merge(total_data,
                              total_risk_exposure,
                              on=['trade_date', 'code'])

        is_in_benchmark = (total_data.weight >
                           0.).astype(float).values.reshape((-1, 1))
        total_data.loc[:, 'benchmark'] = is_in_benchmark
        total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark)
        total_data.sort_values(['trade_date', 'code'], inplace=True)
        self.index_return = self.engine.fetch_dx_return_index_range(
            self.benchmark, dates=self.dates, horizon=self.horizon,
            offset=1).set_index('trade_date')
        self.total_data = total_data
        self.total_risk_cov = total_risk_cov
Example #7
0
def cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk,
            industry_matrix, dx_returns):
    total_data = pd.merge(factor_data, risk_exposure, on='code')
    total_data = pd.merge(total_data, industry_matrix, on='code')
    total_data = total_data.replace([np.inf, -np.inf], np.nan).dropna()

    if len(total_data) < 0.33 * len(factor_data):
        alpha_logger.warning(
            f"valid data point({len(total_data)}) "
            f"is less than 33% of the total sample ({len(factor_data)}). Omit this run"
        )
        return np.nan, np.nan, np.nan

    total_risk_exp = total_data[constraint_risk]

    er = total_data[[factor_name]].values.astype(float)
    er = factor_processing(er, [winsorize_normal, standardize],
                           total_risk_exp.values, [standardize]).flatten()
    industry = total_data.industry_name.values

    codes = total_data.code.tolist()
    target_pos = pd.DataFrame({
        'code': codes,
        'weight': er,
        'industry': industry
    })
    target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs(
    ).sum()
    target_pos = pd.merge(target_pos, dx_returns, on=['code'])
    target_pos = pd.merge(target_pos,
                          total_data[['code'] + constraint_risk],
                          on=['code'])
    total_risk_exp = target_pos[constraint_risk]
    activate_weight = target_pos['weight'].values
    excess_return = np.exp(target_pos[['dx']].values) - 1.
    excess_return = factor_processing(
        excess_return, [winsorize_normal, standardize], total_risk_exp.values,
        [winsorize_normal, standardize]).flatten()
    port_ret = np.log(activate_weight @ excess_return + 1.)
    ic = np.corrcoef(excess_return, activate_weight)[0, 1]
    x = sm.add_constant(activate_weight)
    results = sm.OLS(excess_return, x).fit()
    t_stats = results.tvalues[1]

    alpha_logger.info(
        f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}"
    )
    alpha_logger.info(
        f"{ref_date} risk_exposure: "
        f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}"
    )
    return port_ret, ic, t_stats
Example #8
0
def prepare_data(engine: SqlEngine,
                 factors: Union[Transformer, Iterable[object]],
                 start_date: str,
                 end_date: str,
                 frequency: str,
                 universe: Universe,
                 benchmark: int,
                 warm_start: int = 0):
    if warm_start > 0:
        p = Period(frequency)
        p = Period(length=-warm_start * p.length(), units=p.units())
        start_date = advanceDateByCalendar('china.sse', start_date,
                                           p).strftime('%Y-%m-%d')

    dates = makeSchedule(start_date,
                         end_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Forward)

    dates = [d.strftime('%Y-%m-%d') for d in dates]

    horizon = map_freq(frequency)

    if isinstance(factors, Transformer):
        transformer = factors
    else:
        transformer = Transformer(factors)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).sort_values(
                                              ['trade_date', 'code'])
    alpha_logger.info("factor data loading finished")
    return_df = engine.fetch_dx_return_range(universe,
                                             dates=dates,
                                             horizon=horizon)
    alpha_logger.info("return data loading finished")
    industry_df = engine.fetch_industry_range(universe, dates=dates)
    alpha_logger.info("industry data loading finished")
    benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
    alpha_logger.info("benchmark data loading finished")

    df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()
    df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
    df = pd.merge(df, industry_df, on=['trade_date', 'code'])
    df['weight'] = df['weight'].fillna(0.)

    return dates, df[['trade_date', 'code', 'dx']], df[[
        'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry'
    ] + transformer.names]
Example #9
0
def fetch_data_package(engine: SqlEngine,
                       alpha_factors: Iterable[object],
                       start_date: str,
                       end_date: str,
                       frequency: str,
                       universe: Universe,
                       benchmark: int,
                       warm_start: int = 0,
                       batch: int = 1,
                       neutralized_risk: Iterable[str] = None,
                       risk_model: str = 'short',
                       pre_process: Iterable[object] = None,
                       post_process: Iterable[object] = None) -> dict:
    alpha_logger.info("Starting data package fetching ...")

    transformer = Transformer(alpha_factors)
    dates, return_df, factor_df = prepare_data(engine, transformer, start_date,
                                               end_date, frequency, universe,
                                               benchmark, warm_start)

    return_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y = \
        _merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)

    return_df['weight'] = train_x['weight']
    return_df['industry'] = train_x['industry']
    return_df['industry_code'] = train_x['industry_code']
    return_df['isOpen'] = train_x['isOpen']

    if neutralized_risk:
        for i, name in enumerate(neutralized_risk):
            return_df.loc[:, name] = risk_exp[:, i]

    alpha_logger.info("Loading data is finished")

    train_x_buckets, train_y_buckets, predict_x_buckets, predict_y_buckets = batch_processing(
        x_values, y_values, dates, date_label, batch, risk_exp, pre_process,
        post_process)

    alpha_logger.info("Data processing is finished")

    ret = dict()
    ret['x_names'] = transformer.names
    ret['settlement'] = return_df
    ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets}
    ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets}
    return ret
Example #10
0
def update_uqer_index_components(ds, **kwargs):
    ref_date, this_date = process_date(ds)
    flag = check_holiday(this_date)

    if not flag:
        return

    index_codes = [
        '000001', '000002', '000003', '000004', '000005', '000006', '000007',
        '000008', '000009', '000010', '000015', '000016', '000020', '000090',
        '000132', '000133', '000300', '000852', '000902', '000903', '000904',
        '000905', '000906', '000907', '000922', '399001', '399002', '399004',
        '399005', '399006', '399007', '399008', '399009', '399010', '399011',
        '399012', '399013', '399107', '399324', '399330', '399333', '399400',
        '399401', '399649'
    ]

    total_data = pd.DataFrame()

    for index in index_codes:
        df = api.IdxCloseWeightGet(ticker=index,
                                   beginDate=ref_date,
                                   endDate=ref_date)

        if df.empty:
            ref_previous_date = advanceDateByCalendar('china.sse', this_date,
                                                      '-1b')

            query = select([IndexComponent]).where(
                and_(IndexComponent.trade_date == ref_previous_date,
                     IndexComponent.indexCode == int(index)))
            df = pd.read_sql(query, engine)
            df['trade_date'] = this_date

            if df.empty:
                continue
            alpha_logger.info('{0} is finished with previous data {1}'.format(
                index, len(df)))
        else:
            ################################
            # 2017-10-09, patch for uqer bug
            def filter_out_eqy(code: str):
                if code[0] in ['0', '3'] and code[-4:] in ['XSHE']:
                    return True
                elif code[0] in ['6'] and code[-4:] in ['XSHG']:
                    return True
                else:
                    return False

            df = df[df.consID.apply(lambda x: filter_out_eqy(x))]
            ################################
            df.rename(columns={
                'ticker': 'indexCode',
                'secShortName': 'indexShortName',
                'consTickerSymbol': 'code',
                'consExchangeCD': 'exchangeCD',
                'consShortName': 'secShortName'
            },
                      inplace=True)
            df['indexCode'] = df.indexCode.astype(int)
            df['code'] = df.code.astype(int)
            df['trade_date'] = this_date
            del df['secID']
            del df['consID']
            alpha_logger.info('{0} is finished with new data {1}'.format(
                index, len(df)))
        total_data = total_data.append(df)

    index_codes = total_data.indexCode.unique()
    index_codes = [int(index) for index in index_codes]

    query = delete(IndexComponent).where(
        and_(IndexComponent.trade_date == this_date,
             IndexComponent.indexCode.in_(index_codes)))
    engine.execute(query)

    if total_data.empty:
        return

    data_info_log(total_data, IndexComponent)
    format_data(total_data)
    total_data.to_sql(IndexComponent.__table__.name,
                      engine,
                      index=False,
                      if_exists='append')
Example #11
0
def process_date(ds):
    alpha_logger.info("Loading data at {0}".format(ds))
    this_date = dt.datetime.strptime(ds, '%Y-%m-%d')
    ref_date = this_date.strftime('%Y%m%d')
    return ref_date, this_date
Example #12
0
def prepare_data(engine: SqlEngine,
                 factors: Union[Transformer, Iterable[object]],
                 start_date: str,
                 end_date: str,
                 frequency: str,
                 universe: Universe,
                 benchmark: int,
                 warm_start: int = 0,
                 fit_target: Union[Transformer, object] = None):
    if warm_start > 0:
        p = Period(frequency)
        p = Period(length=-warm_start * p.length(), units=p.units())
        start_date = advanceDateByCalendar('china.sse', start_date,
                                           p).strftime('%Y-%m-%d')

    dates = makeSchedule(start_date,
                         end_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Forward)

    dates = [d.strftime('%Y-%m-%d') for d in dates]

    horizon = map_freq(frequency)

    if isinstance(factors, Transformer):
        transformer = factors
    else:
        transformer = Transformer(factors)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).sort_values(
                                              ['trade_date', 'code'])
    alpha_logger.info("factor data loading finished")

    if fit_target is None:
        target_df = engine.fetch_dx_return_range(universe,
                                                 dates=dates,
                                                 horizon=horizon)
    else:
        one_more_date = advanceDateByCalendar('china.sse', dates[-1],
                                              frequency)
        target_df = engine.fetch_factor_range_forward(universe,
                                                      factors=fit_target,
                                                      dates=dates +
                                                      [one_more_date])
        target_df = target_df[target_df.trade_date.isin(dates)]
        target_df = target_df.groupby('code').apply(
            lambda x: x.fillna(method='pad'))
    alpha_logger.info("fit target data loading finished")

    industry_df = engine.fetch_industry_range(universe, dates=dates)
    alpha_logger.info("industry data loading finished")
    benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
    alpha_logger.info("benchmark data loading finished")

    df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna()
    df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
    df = pd.merge(df, industry_df, on=['trade_date', 'code'])
    df['weight'] = df['weight'].fillna(0.)
    df.dropna(inplace=True)

    return dates, df[[
        'trade_date', 'code', 'dx'
    ]], df[['trade_date', 'code', 'weight', 'industry_code', 'industry'] +
           transformer.names]
Example #13
0
def fetch_data_package(engine: SqlEngine,
                       alpha_factors: Iterable[object],
                       start_date: str,
                       end_date: str,
                       frequency: str,
                       universe: Universe,
                       benchmark: int,
                       warm_start: int = 0,
                       batch: int = 1,
                       neutralized_risk: Iterable[str] = None,
                       risk_model: str = 'short',
                       pre_process: Iterable[object] = None,
                       post_process: Iterable[object] = None,
                       fit_target: Union[Transformer, object] = None) -> dict:
    alpha_logger.info("Starting data package fetching ...")
    transformer = Transformer(alpha_factors)
    names = transformer.names
    dates, target_df, factor_df = prepare_data(engine,
                                               transformer,
                                               start_date,
                                               end_date,
                                               frequency,
                                               universe,
                                               benchmark,
                                               warm_start + batch,
                                               fit_target=fit_target)

    target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \
        _merge_df(engine, names, factor_df, target_df, universe, dates, risk_model,
                  neutralized_risk)

    alpha_logger.info("data merging finished")

    target_df['weight'] = train_x['weight']
    target_df['industry'] = train_x['industry']
    target_df['industry_code'] = train_x['industry_code']

    if neutralized_risk:
        for i, name in enumerate(neutralized_risk):
            target_df.loc[:, name] = risk_exp[:, i]

    alpha_logger.info("Loading data is finished")

    train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets, predict_codes_bucket \
        = batch_processing(names,
                           x_values,
                           y_values,
                           dates,
                           date_label,
                           batch,
                           risk_exp,
                           pre_process,
                           post_process,
                           codes)

    alpha_logger.info("Data processing is finished")

    ret = dict()
    ret['x_names'] = names
    ret['settlement'] = target_df[target_df.trade_date >= start_date]

    train_x_buckets = {
        k: train_x_buckets[k]
        for k in train_x_buckets if k.strftime('%Y-%m-%d') >= start_date
    }
    train_y_buckets = {
        k: train_y_buckets[k]
        for k in train_y_buckets if k.strftime('%Y-%m-%d') >= start_date
    }
    train_risk_buckets = {
        k: train_risk_buckets[k]
        for k in train_risk_buckets if k.strftime('%Y-%m-%d') >= start_date
    }

    predict_x_buckets = {
        k: predict_x_buckets[k]
        for k in predict_x_buckets if k.strftime('%Y-%m-%d') >= start_date
    }
    predict_y_buckets = {
        k: predict_y_buckets[k]
        for k in predict_y_buckets if k.strftime('%Y-%m-%d') >= start_date
    }
    if neutralized_risk:
        predict_risk_buckets = {
            k: predict_risk_buckets[k]
            for k in predict_risk_buckets
            if k.strftime('%Y-%m-%d') >= start_date
        }
    else:
        predict_risk_buckets = None
    predict_codes_bucket = {
        k: predict_codes_bucket[k]
        for k in predict_codes_bucket if k.strftime('%Y-%m-%d') >= start_date
    }

    ret['train'] = {
        'x': train_x_buckets,
        'y': train_y_buckets,
        'risk': train_risk_buckets
    }
    ret['predict'] = {
        'x': predict_x_buckets,
        'y': predict_y_buckets,
        'risk': predict_risk_buckets,
        'code': predict_codes_bucket
    }
    return ret
Example #14
0
 def setUp(self):
     self.engine = SqlEngine(DATA_ENGINE_URI)
     dates_list = bizDatesList('china.sse', '2010-10-01', '2018-04-27')
     self.ref_date = random.choice(dates_list).strftime('%Y-%m-%d')
     alpha_logger.info("Test date: {0}".format(self.ref_date))
Example #15
0
    def run(self):
        alpha_logger.info("starting backting ...")

        total_factors = self.engine.fetch_factor_range(
            self.running_setting.universe,
            self.alpha_model.formulas,
            dates=self.running_setting.dates)
        alpha_logger.info("alpha factor data loading finished ...")

        total_industry = self.engine.fetch_industry_matrix_range(
            self.running_setting.universe,
            dates=self.running_setting.dates,
            category=self.running_setting.industry_cat,
            level=self.running_setting.industry_level)
        alpha_logger.info("industry data loading finished ...")

        total_benchmark = self.engine.fetch_benchmark_range(
            dates=self.running_setting.dates,
            benchmark=self.running_setting.benchmark)
        alpha_logger.info("benchmark data loading finished ...")

        total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range(
            self.running_setting.universe,
            dates=self.running_setting.dates,
            risk_model=self.data_meta.risk_model)
        alpha_logger.info("risk_model data loading finished ...")

        total_returns = self.engine.fetch_dx_return_range(
            self.running_setting.universe,
            dates=self.running_setting.dates,
            horizon=self.running_setting.horizon,
            offset=1)
        alpha_logger.info("returns data loading finished ...")

        total_data = pd.merge(total_factors,
                              total_industry,
                              on=['trade_date', 'code'])
        total_data = pd.merge(total_data,
                              total_benchmark,
                              on=['trade_date', 'code'],
                              how='left')
        total_data.fillna({'weight': 0.}, inplace=True)
        total_data = pd.merge(total_data,
                              total_returns,
                              on=['trade_date', 'code'])
        total_data = pd.merge(total_data,
                              total_risk_exposure,
                              on=['trade_date', 'code'])

        is_in_benchmark = (total_data.weight > 0.).astype(float).reshape(
            (-1, 1))
        total_data.loc[:, 'benchmark'] = is_in_benchmark
        total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark)
        total_data.sort_values(['trade_date', 'code'], inplace=True)
        total_data_groups = total_data.groupby('trade_date')

        rets = []
        turn_overs = []
        leverags = []
        previous_pos = pd.DataFrame()
        executor = copy.deepcopy(self.running_setting.executor)
        positions = pd.DataFrame()

        if self.dask_client is None:
            models = {}
            for ref_date, _ in total_data_groups:
                models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'),
                                               self.alpha_model,
                                               self.data_meta)
        else:

            def worker(parameters):
                new_model = train_model(parameters[0].strftime('%Y-%m-%d'),
                                        parameters[1], parameters[2])
                return parameters[0], new_model

            l = self.dask_client.map(worker,
                                     [(d[0], self.alpha_model, self.data_meta)
                                      for d in total_data_groups])
            results = self.dask_client.gather(l)
            models = dict(results)

        for ref_date, this_data in total_data_groups:
            new_model = models[ref_date]

            this_data = this_data.fillna(
                this_data[new_model.features].median())
            codes = this_data.code.values.tolist()

            if self.running_setting.rebalance_method == 'tv':
                risk_cov = total_risk_cov[total_risk_cov.trade_date ==
                                          ref_date]
                sec_cov = self._generate_sec_cov(this_data, risk_cov)
            else:
                sec_cov = None

            benchmark_w = this_data.weight.values
            constraints = LinearConstraints(self.running_setting.bounds,
                                            this_data, benchmark_w)

            lbound = np.maximum(
                0., benchmark_w - self.running_setting.weights_bandwidth)
            ubound = self.running_setting.weights_bandwidth + benchmark_w

            if previous_pos.empty:
                current_position = None
            else:
                previous_pos.set_index('code', inplace=True)
                remained_pos = previous_pos.loc[codes]

                remained_pos.fillna(0., inplace=True)
                current_position = remained_pos.weight.values

            features = new_model.features
            raw_factors = this_data[features].values
            new_factors = factor_processing(
                raw_factors,
                pre_process=self.data_meta.pre_process,
                risk_factors=this_data[self.data_meta.neutralized_risk].values.
                astype(float) if self.data_meta.neutralized_risk else None,
                post_process=self.data_meta.post_process)

            er = new_model.predict(pd.DataFrame(
                new_factors, columns=features)).astype(float)

            alpha_logger.info('{0} re-balance: {1} codes'.format(
                ref_date, len(er)))
            target_pos = self._calculate_pos(er,
                                             this_data,
                                             constraints,
                                             benchmark_w,
                                             lbound,
                                             ubound,
                                             sec_cov=sec_cov,
                                             current_position=current_position,
                                             **self.running_setting.more_opts)

            target_pos['code'] = codes
            target_pos['trade_date'] = ref_date

            turn_over, executed_pos = executor.execute(target_pos=target_pos)
            leverage = executed_pos.weight.abs().sum()

            ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) -
                                                1.)
            rets.append(np.log(1. + ret))
            executor.set_current(executed_pos)
            turn_overs.append(turn_over)
            leverags.append(leverage)
            positions = positions.append(executed_pos)
            previous_pos = executed_pos

        positions['benchmark_weight'] = total_data['weight'].values
        positions['dx'] = total_data.dx.values

        trade_dates = positions.trade_date.unique()
        ret_df = pd.DataFrame(
            {
                'returns': rets,
                'turn_over': turn_overs,
                'leverage': leverags
            },
            index=trade_dates)

        index_return = self.engine.fetch_dx_return_index_range(
            self.running_setting.benchmark,
            dates=self.running_setting.dates,
            horizon=self.running_setting.horizon,
            offset=1).set_index('trade_date')
        ret_df['benchmark_returns'] = index_return['dx']
        ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1],
                                         self.running_setting.freq)] = 0.
        ret_df = ret_df.shift(1)
        ret_df.iloc[0] = 0.
        ret_df['excess_return'] = ret_df[
            'returns'] - ret_df['benchmark_returns'] * ret_df['leverage']

        return ret_df, positions
Example #16
0
    def run(self, running_setting):
        alpha_logger.info("starting backting ...")
        total_data_groups = self.total_data.groupby('trade_date')

        rets = []
        turn_overs = []
        leverags = []
        previous_pos = pd.DataFrame()
        executor = copy.deepcopy(running_setting.executor)
        positions = pd.DataFrame()

        if self.alpha_models is None:
            self.prepare_backtest_models()

        for ref_date, this_data in total_data_groups:
            risk_model = self.risk_models[ref_date]
            new_model = self.alpha_models[ref_date]
            codes = this_data.code.values.tolist()

            if previous_pos.empty:
                current_position = None
            else:
                previous_pos.set_index('code', inplace=True)
                remained_pos = previous_pos.reindex(codes)

                remained_pos.fillna(0., inplace=True)
                current_position = remained_pos.weight.values

            benchmark_w = this_data.weight.values
            constraints = LinearConstraints(running_setting.bounds, this_data,
                                            benchmark_w)

            lbound, ubound = self._create_lu_bounds(running_setting, codes,
                                                    benchmark_w)

            this_data.fillna(0, inplace=True)
            new_factors = factor_processing(
                this_data[new_model.features].values,
                pre_process=self.data_meta.pre_process,
                risk_factors=this_data[self.data_meta.neutralized_risk].values.
                astype(float) if self.data_meta.neutralized_risk else None,
                post_process=self.data_meta.post_process)
            new_factors = pd.DataFrame(new_factors,
                                       columns=new_model.features,
                                       index=codes)
            er = new_model.predict(new_factors).astype(float)

            alpha_logger.info('{0} re-balance: {1} codes'.format(
                ref_date, len(er)))
            target_pos = self._calculate_pos(
                running_setting,
                er,
                this_data,
                constraints,
                benchmark_w,
                lbound,
                ubound,
                risk_model=risk_model.get_risk_profile(codes),
                current_position=current_position)

            target_pos['code'] = codes
            target_pos['trade_date'] = ref_date

            turn_over, executed_pos = executor.execute(target_pos=target_pos)
            leverage = executed_pos.weight.abs().sum()

            ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) -
                                                1.)
            rets.append(np.log(1. + ret))
            executor.set_current(executed_pos)
            turn_overs.append(turn_over)
            leverags.append(leverage)
            positions = positions.append(executed_pos)
            previous_pos = executed_pos

        positions['benchmark_weight'] = self.total_data['weight'].values
        positions['dx'] = self.total_data.dx.values

        trade_dates = positions.trade_date.unique()
        ret_df = pd.DataFrame(
            {
                'returns': rets,
                'turn_over': turn_overs,
                'leverage': leverags
            },
            index=trade_dates)

        ret_df['benchmark_returns'] = self.index_return['dx']
        ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1],
                                         self.freq)] = 0.
        ret_df = ret_df.shift(1)
        ret_df.iloc[0] = 0.
        ret_df['excess_return'] = ret_df[
            'returns'] - ret_df['benchmark_returns'] * ret_df['leverage']
        return ret_df, positions
def update_materialized_views(ds, **kwargs):
    alpha_logger.info("starting refresh full_factor_view ...")
    engine.execute("REFRESH MATERIALIZED VIEW CONCURRENTLY full_factor_view;")
    alpha_logger.info("starting cluster full_factor_view ...")
    engine.execute("CLUSTER full_factor_view;")
Example #18
0
def initdb(args):
    alpha_logger.info('DB: ' + args.url)
    engine = create_engine(args.url)
    models.Base.metadata.create_all(engine)
    alpha_logger.info('DB: initialization finished.')
Example #19
0
    def run(self, running_setting):
        alpha_logger.info("starting backting ...")
        total_data_groups = self.total_data.groupby('trade_date')

        rets = []
        turn_overs = []
        leverags = []
        previous_pos = pd.DataFrame()
        executor = copy.deepcopy(running_setting.executor)
        positions = pd.DataFrame()

        if self.dask_client is None:
            models = {}
            for ref_date, _ in total_data_groups:
                models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'),
                                               self.alpha_model,
                                               self.data_meta)
        else:

            def worker(parameters):
                new_model = train_model(parameters[0].strftime('%Y-%m-%d'),
                                        parameters[1], parameters[2])
                return parameters[0], new_model

            l = self.dask_client.map(worker,
                                     [(d[0], self.alpha_model, self.data_meta)
                                      for d in total_data_groups])
            results = self.dask_client.gather(l)
            models = dict(results)

        for ref_date, this_data in total_data_groups:
            new_model = models[ref_date]
            codes = this_data.code.values.tolist()

            if previous_pos.empty:
                current_position = None
            else:
                previous_pos.set_index('code', inplace=True)
                remained_pos = previous_pos.loc[codes]

                remained_pos.fillna(0., inplace=True)
                current_position = remained_pos.weight.values

            if running_setting.rebalance_method == 'tv':
                risk_cov = self.total_risk_cov[self.total_risk_cov.trade_date
                                               == ref_date]
                sec_cov = self._generate_sec_cov(this_data, risk_cov)
            else:
                sec_cov = None

            benchmark_w = this_data.weight.values
            constraints = LinearConstraints(running_setting.bounds, this_data,
                                            benchmark_w)

            lbound, ubound = self._create_lu_bounds(running_setting, codes,
                                                    benchmark_w)

            features = new_model.features
            dfs = []
            for name in features:
                data_cleaned = this_data.dropna(subset=[name])
                raw_factors = data_cleaned[[name]].values
                new_factors = factor_processing(
                    raw_factors,
                    pre_process=self.data_meta.pre_process,
                    risk_factors=data_cleaned[
                        self.data_meta.neutralized_risk].values.astype(float)
                    if self.data_meta.neutralized_risk else None,
                    post_process=self.data_meta.post_process)
                df = pd.DataFrame(new_factors,
                                  columns=[name],
                                  index=data_cleaned.code)
                dfs.append(df)

            new_factors = pd.concat(dfs, axis=1)
            new_factors = new_factors.loc[codes].fillna(new_factors.median())
            er = new_model.predict(new_factors).astype(float)

            alpha_logger.info('{0} re-balance: {1} codes'.format(
                ref_date, len(er)))
            target_pos = self._calculate_pos(running_setting,
                                             er,
                                             this_data,
                                             constraints,
                                             benchmark_w,
                                             lbound,
                                             ubound,
                                             sec_cov=sec_cov,
                                             current_position=current_position)

            target_pos['code'] = codes
            target_pos['trade_date'] = ref_date

            turn_over, executed_pos = executor.execute(target_pos=target_pos)
            leverage = executed_pos.weight.abs().sum()

            ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) -
                                                1.)
            rets.append(np.log(1. + ret))
            executor.set_current(executed_pos)
            turn_overs.append(turn_over)
            leverags.append(leverage)
            positions = positions.append(executed_pos)
            previous_pos = executed_pos

        positions['benchmark_weight'] = self.total_data['weight'].values
        positions['dx'] = self.total_data.dx.values

        trade_dates = positions.trade_date.unique()
        ret_df = pd.DataFrame(
            {
                'returns': rets,
                'turn_over': turn_overs,
                'leverage': leverags
            },
            index=trade_dates)

        ret_df['benchmark_returns'] = self.index_return['dx']
        ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1],
                                         self.freq)] = 0.
        ret_df = ret_df.shift(1)
        ret_df.iloc[0] = 0.
        ret_df['excess_return'] = ret_df[
            'returns'] - ret_df['benchmark_returns'] * ret_df['leverage']

        return ret_df, positions