Example #1
0
    def testWeeksDaysAlgebra(self):
        twoWeeks = Period(length=2, units=TimeUnits.Weeks)
        oneWeek = Period(length=1, units=TimeUnits.Weeks)
        threeDays = Period(length=3, units=TimeUnits.Days)
        oneDay = Period(length=1, units=TimeUnits.Days)

        n = 2
        flag = twoWeeks / n == oneWeek
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(twoWeeks, n, oneWeek))

        n = 7
        flag = oneWeek / 7 == oneDay
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(oneWeek, n, oneDay))

        sum = threeDays
        sum += oneDay
        flag = sum == Period(length=4, units=TimeUnits.Days)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " != {2}".format(threeDays, oneDay,
                             Period(length=4, units=TimeUnits.Days)))

        sum += oneWeek
        flag = sum == Period(length=11, units=TimeUnits.Days)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " + {2}"
            " != {3}".format(threeDays, oneDay, oneWeek,
                             Period(length=11, units=TimeUnits.Days)))

        sevenDays = Period(length=7, units=TimeUnits.Days)
        flag = sevenDays.length() == 7
        self.assertTrue(
            flag, "normalization error: sevenDays.length"
            " is {0:d}"
            " instead of 7".format(sevenDays.length()))
        flag = sevenDays.units() == TimeUnits.Days
        self.assertTrue(
            flag, "normalization error: sevenDays.units"
            " is {0:d}"
            " instead of {1:d}".format(sevenDays.units(), TimeUnits.Days))

        normalizedSevenDays = sevenDays.normalize()
        flag = normalizedSevenDays.length() == 1
        self.assertTrue(
            flag, "normalization error: normalizedSevenDays.length"
            " is {0:d}"
            " instead of 1".format(normalizedSevenDays.length()))
        flag = normalizedSevenDays.units() == TimeUnits.Weeks
        self.assertTrue(
            flag, "normalization error: TwelveMonths.units"
            " is {0:d}"
            " instead of {1:d}".format(normalizedSevenDays.units(),
                                       TimeUnits.Weeks))
Example #2
0
def prepare_data(engine: SqlEngine,
                 factors: Union[Transformer, Iterable[object]],
                 start_date: str,
                 end_date: str,
                 frequency: str,
                 universe: Universe,
                 benchmark: int,
                 warm_start: int = 0):
    if warm_start > 0:
        p = Period(frequency)
        p = Period(length=-warm_start * p.length(), units=p.units())
        start_date = advanceDateByCalendar('china.sse', start_date,
                                           p).strftime('%Y-%m-%d')

    dates = makeSchedule(start_date,
                         end_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Forward)

    dates = [d.strftime('%Y-%m-%d') for d in dates]

    horizon = map_freq(frequency)

    if isinstance(factors, Transformer):
        transformer = factors
    else:
        transformer = Transformer(factors)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).sort_values(
                                              ['trade_date', 'code'])
    alpha_logger.info("factor data loading finished")
    return_df = engine.fetch_dx_return_range(universe,
                                             dates=dates,
                                             horizon=horizon)
    alpha_logger.info("return data loading finished")
    industry_df = engine.fetch_industry_range(universe, dates=dates)
    alpha_logger.info("industry data loading finished")
    benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
    alpha_logger.info("benchmark data loading finished")

    df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()
    df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
    df = pd.merge(df, industry_df, on=['trade_date', 'code'])
    df['weight'] = df['weight'].fillna(0.)

    return dates, df[['trade_date', 'code', 'dx']], df[[
        'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry'
    ] + transformer.names]
Example #3
0
def _map_horizon(frequency: str) -> int:
    parsed_period = Period(frequency)
    unit = parsed_period.units()
    length = parsed_period.length()
    if unit == TimeUnits.BDays or unit == TimeUnits.Days:
        return length - 1
    elif unit == TimeUnits.Weeks:
        return 5 * length - 1
    elif unit == TimeUnits.Months:
        return 22 * length - 1
    else:
        raise ValueError(
            '{0} is an unrecognized frequency rule'.format(frequency))
Example #4
0
def prepare_data(engine: SqlEngine,
                 factors: Union[Transformer, Iterable[object]],
                 start_date: str,
                 end_date: str,
                 frequency: str,
                 universe: Universe,
                 benchmark: int,
                 warm_start: int = 0,
                 fit_target: Union[Transformer, object] = None):
    if warm_start > 0:
        p = Period(frequency)
        p = Period(length=-warm_start * p.length(), units=p.units())
        start_date = advanceDateByCalendar('china.sse', start_date,
                                           p).strftime('%Y-%m-%d')

    dates = makeSchedule(start_date,
                         end_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Forward)

    dates = [d.strftime('%Y-%m-%d') for d in dates]

    horizon = map_freq(frequency)

    if isinstance(factors, Transformer):
        transformer = factors
    else:
        transformer = Transformer(factors)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).sort_values(
                                              ['trade_date', 'code'])
    alpha_logger.info("factor data loading finished")

    if fit_target is None:
        target_df = engine.fetch_dx_return_range(universe,
                                                 dates=dates,
                                                 horizon=horizon)
    else:
        one_more_date = advanceDateByCalendar('china.sse', dates[-1],
                                              frequency)
        target_df = engine.fetch_factor_range_forward(universe,
                                                      factors=fit_target,
                                                      dates=dates +
                                                      [one_more_date])
        target_df = target_df[target_df.trade_date.isin(dates)]
        target_df = target_df.groupby('code').apply(
            lambda x: x.fillna(method='pad'))
    alpha_logger.info("fit target data loading finished")

    industry_df = engine.fetch_industry_range(universe, dates=dates)
    alpha_logger.info("industry data loading finished")
    benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
    alpha_logger.info("benchmark data loading finished")

    df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna()
    df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
    df = pd.merge(df, industry_df, on=['trade_date', 'code'])
    df['weight'] = df['weight'].fillna(0.)
    df.dropna(inplace=True)

    return dates, df[[
        'trade_date', 'code', 'dx'
    ]], df[['trade_date', 'code', 'weight', 'industry_code', 'industry'] +
           transformer.names]
Example #5
0
def fetch_predict_phase(engine,
                        alpha_factors: Union[Transformer, Iterable[object]],
                        ref_date,
                        frequency,
                        universe,
                        batch=1,
                        neutralized_risk: Iterable[str] = None,
                        risk_model: str = 'short',
                        pre_process: Iterable[object] = None,
                        post_process: Iterable[object] = None,
                        warm_start: int = 0,
                        fillna: str = None,
                        fit_target: Union[Transformer, object] = None):
    if isinstance(alpha_factors, Transformer):
        transformer = alpha_factors
    else:
        transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch - 1) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    horizon = map_freq(frequency)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates)

    if fillna:
        factor_df = factor_df.groupby('trade_date').apply(
            lambda x: x.fillna(x.median())).reset_index(drop=True).dropna()
    else:
        factor_df = factor_df.dropna()

    if fit_target is None:
        target_df = engine.fetch_dx_return_range(universe,
                                                 dates=dates,
                                                 horizon=horizon)
    else:
        one_more_date = advanceDateByCalendar('china.sse', dates[-1],
                                              frequency)
        target_df = engine.fetch_factor_range_forward(universe,
                                                      factors=fit_target,
                                                      dates=dates +
                                                      [one_more_date])
        target_df = target_df[target_df.trade_date.isin(dates)]
        target_df = target_df.groupby('code').apply(
            lambda x: x.fillna(method='pad'))

    names = transformer.names

    if neutralized_risk:
        risk_df = engine.fetch_risk_model_range(universe,
                                                dates=dates,
                                                risk_model=risk_model)[1]
        used_neutralized_risk = list(set(neutralized_risk).difference(names))
        risk_df = risk_df[['trade_date', 'code'] +
                          used_neutralized_risk].dropna()
        train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
        train_x = pd.merge(train_x,
                           target_df,
                           on=['trade_date', 'code'],
                           how='left')
        risk_exp = train_x[neutralized_risk].values.astype(float)
    else:
        train_x = pd.merge(factor_df,
                           target_df,
                           on=['trade_date', 'code'],
                           how='left')
        risk_exp = None

    train_x.dropna(inplace=True, subset=train_x.columns[:-1])
    x_values = train_x[names].values.astype(float)
    y_values = train_x[['dx']].values.astype(float)

    date_label = pd.DatetimeIndex(train_x.trade_date).to_pydatetime()
    dates = np.unique(date_label)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        end = dates[-1]
        start = dates[-batch] if batch <= len(dates) else dates[0]

        left_index = bisect.bisect_left(date_label, start)
        right_index = bisect.bisect_right(date_label, end)
        this_raw_x = x_values[left_index:right_index]
        this_raw_y = y_values[left_index:right_index]
        sub_dates = date_label[left_index:right_index]

        if risk_exp is not None:
            this_risk_exp = risk_exp[left_index:right_index]
        else:
            this_risk_exp = None

        ne_x = factor_processing(this_raw_x,
                                 pre_process=pre_process,
                                 risk_factors=this_risk_exp,
                                 post_process=post_process)

        ne_y = factor_processing(this_raw_y,
                                 pre_process=pre_process,
                                 risk_factors=this_risk_exp,
                                 post_process=post_process)

        inner_left_index = bisect.bisect_left(sub_dates, end)
        inner_right_index = bisect.bisect_right(sub_dates, end)

        ne_x = ne_x[inner_left_index:inner_right_index]
        ne_y = ne_y[inner_left_index:inner_right_index]

        left_index = bisect.bisect_left(date_label, end)
        right_index = bisect.bisect_right(date_label, end)

        codes = train_x.code.values[left_index:right_index]
    else:
        ne_x = None
        ne_y = None
        codes = None

    ret = dict()
    ret['x_names'] = transformer.names
    ret['predict'] = {
        'x': pd.DataFrame(ne_x, columns=transformer.names),
        'code': codes,
        'y': ne_y.flatten()
    }

    return ret
Example #6
0
def fetch_train_phase(engine,
                      alpha_factors: Union[Transformer, Iterable[object]],
                      ref_date,
                      frequency,
                      universe,
                      batch=1,
                      neutralized_risk: Iterable[str] = None,
                      risk_model: str = 'short',
                      pre_process: Iterable[object] = None,
                      post_process: Iterable[object] = None,
                      warm_start: int = 0,
                      fit_target: Union[Transformer, object] = None) -> dict:
    if isinstance(alpha_factors, Transformer):
        transformer = alpha_factors
    else:
        transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    horizon = map_freq(frequency)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates)
    if fit_target is None:
        target_df = engine.fetch_dx_return_range(universe,
                                                 dates=dates,
                                                 horizon=horizon)
    else:
        one_more_date = advanceDateByCalendar('china.sse', dates[-1],
                                              frequency)
        target_df = engine.fetch_factor_range_forward(universe,
                                                      factors=fit_target,
                                                      dates=dates +
                                                      [one_more_date])
        target_df = target_df[target_df.trade_date.isin(dates)]
        target_df = target_df.groupby('code').apply(
            lambda x: x.fillna(method='pad'))

    df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna()

    target_df, factor_df = df[['trade_date', 'code',
                               'dx']], df[['trade_date', 'code'] +
                                          transformer.names]

    target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \
        _merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model,
                  neutralized_risk)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        pyFinAssert(
            len(dates) >= 2, ValueError,
            "No previous data for training for the date {0}".format(ref_date))
        end = dates[-2]
        start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0]
    else:
        end = dates[-1]
        start = dates[-batch] if batch <= len(dates) else dates[0]

    index = (date_label >= start) & (date_label <= end)
    this_raw_x = x_values[index]
    this_raw_y = y_values[index]
    this_code = codes[index]
    if risk_exp is not None:
        this_risk_exp = risk_exp[index]
    else:
        this_risk_exp = None

    ne_x = factor_processing(this_raw_x,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ne_y = factor_processing(this_raw_y,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ret = dict()
    ret['x_names'] = transformer.names
    ret['train'] = {
        'x': pd.DataFrame(ne_x, columns=transformer.names),
        'y': ne_y,
        'code': this_code
    }

    return ret
Example #7
0
def fetch_predict_phase(engine,
                        alpha_factors: Iterable[object],
                        ref_date,
                        frequency,
                        universe,
                        batch,
                        neutralized_risk: Iterable[str] = None,
                        risk_model: str = 'short',
                        pre_process: Iterable[object] = None,
                        post_process: Iterable[object] = None,
                        warm_start: int = 0):
    transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).dropna()

    names = transformer.names

    if neutralized_risk:
        risk_df = engine.fetch_risk_model_range(universe,
                                                dates=dates,
                                                risk_model=risk_model)[1]
        used_neutralized_risk = list(set(neutralized_risk).difference(names))
        risk_df = risk_df[['trade_date', 'code'] +
                          used_neutralized_risk].dropna()
        train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
        risk_exp = train_x[neutralized_risk].values.astype(float)
        x_values = train_x[names].values.astype(float)
    else:
        train_x = factor_df.copy()
        risk_exp = None

    date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
    dates = np.unique(date_label)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        end = dates[-1]
        start = dates[-batch]

        # index = (date_label >= start) & (date_label <= end)
        left_index = bisect.bisect_left(date_label, start)
        right_index = bisect.bisect_right(date_label, end)
        this_raw_x = x_values[left_index:right_index]
        sub_dates = date_label[left_index:right_index]

        if risk_exp is not None:
            this_risk_exp = risk_exp[left_index:right_index]
        else:
            this_risk_exp = None

        ne_x = factor_processing(this_raw_x,
                                 pre_process=pre_process,
                                 risk_factors=this_risk_exp,
                                 post_process=post_process)

        inner_left_index = bisect.bisect_left(sub_dates, end)
        inner_right_index = bisect.bisect_right(sub_dates, end)

        ne_x = ne_x[inner_left_index:inner_right_index]

        left_index = bisect.bisect_left(date_label, end)
        right_index = bisect.bisect_right(date_label, end)

        codes = train_x.code.values[left_index:right_index]
    else:
        ne_x = None
        codes = None

    ret = dict()
    ret['x_names'] = transformer.names
    ret['predict'] = {'x': ne_x, 'code': codes}

    return ret
Example #8
0
def fetch_train_phase(engine,
                      alpha_factors: Iterable[object],
                      ref_date,
                      frequency,
                      universe,
                      batch,
                      neutralized_risk: Iterable[str] = None,
                      risk_model: str = 'short',
                      pre_process: Iterable[object] = None,
                      post_process: Iterable[object] = None,
                      warm_start: int = 0) -> dict:
    transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch + 1) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    horizon = _map_horizon(frequency)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates)
    return_df = engine.fetch_dx_return_range(universe,
                                             dates=dates,
                                             horizon=horizon)

    df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()

    return_df, factor_df = df[['trade_date', 'code',
                               'dx']], df[['trade_date', 'code', 'isOpen'] +
                                          transformer.names]

    return_df, dates, date_label, risk_exp, x_values, y_values, _, _ = \
        _merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        end = dates[-2]
        start = dates[-batch - 1]
    else:
        end = dates[-1]
        start = dates[-batch]

    index = (date_label >= start) & (date_label <= end)
    this_raw_x = x_values[index]
    this_raw_y = y_values[index]
    if risk_exp is not None:
        this_risk_exp = risk_exp[index]
    else:
        this_risk_exp = None

    ne_x = factor_processing(this_raw_x,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ne_y = factor_processing(this_raw_y,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ret = dict()
    ret['x_names'] = transformer.names
    ret['train'] = {'x': ne_x, 'y': ne_y}

    return ret
Example #9
0
    def testYearsMonthsAlgebra(self):
        oneYear = Period(length=1, units=TimeUnits.Years)
        sixMonths = Period(length=6, units=TimeUnits.Months)
        threeMonths = Period(length=3, units=TimeUnits.Months)

        n = 4
        flag = oneYear / n == threeMonths
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(oneYear, n, threeMonths))

        n = 2
        flag = oneYear / n == sixMonths
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(oneYear, n, sixMonths))

        sum = threeMonths
        sum += sixMonths
        flag = sum == Period(length=9, units=TimeUnits.Months)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " != {2}".format(threeMonths, sixMonths,
                             Period(length=9, units=TimeUnits.Months)))

        sum += oneYear
        flag = sum == Period(length=21, units=TimeUnits.Months)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " + {2}"
            " != {3}".format(threeMonths, sixMonths, oneYear,
                             Period(length=21, units=TimeUnits.Months)))

        twelveMonths = Period(length=12, units=TimeUnits.Months)
        flag = twelveMonths.length() == 12
        self.assertTrue(
            flag, "normalization error: TwelveMonths.length"
            " is {0:d}"
            " instead of 12".format(twelveMonths.length()))
        flag = twelveMonths.units() == TimeUnits.Months
        self.assertTrue(
            flag, "normalization error: TwelveMonths.units"
            " is {0:d}"
            " instead of {1:d}".format(twelveMonths.units(), TimeUnits.Months))

        normalizedTwelveMonths = Period(length=12, units=TimeUnits.Months)
        normalizedTwelveMonths = normalizedTwelveMonths.normalize()
        flag = normalizedTwelveMonths.length() == 1
        self.assertTrue(
            flag, "normalization error: TwelveMonths.length"
            " is {0:d}"
            " instead of 1".format(twelveMonths.length()))
        flag = normalizedTwelveMonths.units() == TimeUnits.Years
        self.assertTrue(
            flag, "normalization error: TwelveMonths.units"
            " is {0:d}"
            " instead of {1:d}".format(twelveMonths.units(), TimeUnits.Years))

        thirtyDays = Period(length=30, units=TimeUnits.Days)
        normalizedThirtyDays = thirtyDays.normalize()
        flag = normalizedThirtyDays.units() == TimeUnits.Days
        self.assertTrue(
            flag, "normalization error: ThirtyDays.units"
            " is {0:d}"
            " instead of {1:d}".format(normalizedThirtyDays.units(),
                                       TimeUnits.Days))

        thirtyBDays = Period(length=30, units=TimeUnits.BDays)
        normalizedThirtyBDays = thirtyBDays.normalize()
        flag = normalizedThirtyBDays.units() == TimeUnits.BDays
        self.assertTrue(
            flag, "normalization error: ThirtyBDays.units"
            " is {0:d}"
            " instead of {1:d}".format(normalizedThirtyBDays.units(),
                                       TimeUnits.BDays))