예제 #1
0
    def _filter_sec_on_tiaocang_date(self, tiaocang_date, sec_id):
        sse_cal = Calendar('China.SSE')
        tiaocang_date_prev = sse_cal.advanceDate(
            Date.strptime(str(tiaocang_date)[:10]),
            Period('-1b')).toDateTime()
        tiaocang_date_prev2 = sse_cal.advanceDate(
            Date.strptime(str(tiaocang_date)[:10]),
            Period('-2b')).toDateTime()
        price_data = get_sec_price(start_date=tiaocang_date_prev2,
                                   end_date=tiaocang_date,
                                   sec_ids=sec_id,
                                   data_source=self._data_source,
                                   csv_path=self._csv_path)
        price_data = price_data.transpose()
        price_data.index.name = 'secID'
        # 去除涨幅过大可能买不到的
        price_data['returnFilter'] = price_data[tiaocang_date] / price_data[
            tiaocang_date_prev] > 1 + self._filter_return_on_tiaocang_date
        # 去除有NaN的, 新股
        price_data['ipoFilter'] = pd.isnull(price_data[tiaocang_date] *
                                            price_data[tiaocang_date_prev] *
                                            price_data[tiaocang_date_prev2])
        # 去除停牌的,此处判断标准就是连续三天收盘价格一样
        price_data['tingpaiFilter'] = (
            (price_data[tiaocang_date] == price_data[tiaocang_date_prev]) &
            (price_data[tiaocang_date_prev]
             == price_data[tiaocang_date_prev2]))

        price_data['filters'] = 1 - (1 - price_data['returnFilter']) * (
            1 - price_data['ipoFilter']) * (1 - price_data['tingpaiFilter'])

        return price_data['filters']
    def testDailySchedule(self):
        # Jan 2 and Jan 3 are skipped as New Year holiday
        # Jan 7 is skipped as weekend
        # Jan 8 is adjusted to Jan 9 with following convention
        startDate = Date(2012, 1, 1)
        s = Schedule(startDate, startDate + 7,
                     Period(length=1, units=TimeUnits.Days),
                     Calendar("China.SSE"), BizDayConventions.Preceding)

        expected = [
            Date(2011, 12, 30),
            Date(2012, 1, 4),
            Date(2012, 1, 5),
            Date(2012, 1, 6),
            Date(2012, 1, 9)
        ]
        self.checkDates(s, expected)

        # The schedule should skip Saturday 21st and Sunday 22rd.
        # Previously, it would adjust them to Friday 20th, resulting
        # in three copies of the same date.
        startDate = Date(2012, 1, 17)
        s = Schedule(startDate, startDate + 7,
                     Period(length=1, units=TimeUnits.Days),
                     Calendar("Target"), BizDayConventions.Preceding)
        expected = [
            Date(2012, 1, 17),
            Date(2012, 1, 18),
            Date(2012, 1, 19),
            Date(2012, 1, 20),
            Date(2012, 1, 23),
            Date(2012, 1, 24)
        ]
        self.checkDates(s, expected)
예제 #3
0
 def forward_date(date, tenor, date_format='%Y-%m-%d'):
     try:
         # use pyfin instead to get more accurate and flexible date math
         start_date = Date.strptime(date, date_format)
         sseCal = Calendar('China.SSE')
         ret = sseCal.advanceDate(start_date, Period('-' + tenor), endOfMonth=True)
         # 此处返回的是上一期期末日期,再向后调整一天,以避免区间日期重叠
         ret = sseCal.advanceDate(ret, Period('1b'))
         return str(ret)
     except NameError:
         pass
예제 #4
0
def _map_horizon(frequency: str) -> int:
    parsed_period = Period(frequency)
    unit = parsed_period.units()
    length = parsed_period.length()
    if unit == TimeUnits.BDays or unit == TimeUnits.Days:
        return length - 1
    elif unit == TimeUnits.Weeks:
        return 5 * length - 1
    elif unit == TimeUnits.Months:
        return 22 * length - 1
    else:
        raise ValueError(
            '{0} is an unrecognized frequency rule'.format(frequency))
예제 #5
0
    def testWeeksDaysAlgebra(self):
        twoWeeks = Period(2, TimeUnits.Weeks)
        oneWeek = Period(1, TimeUnits.Weeks)
        threeDays = Period(3, TimeUnits.Days)
        oneDay = Period(1, TimeUnits.Days)

        n = 2
        flag = twoWeeks / n == oneWeek
        self.assertTrue(flag, "division error: {0} / {1:d}"
                              " not equal to {2}".format(twoWeeks, n, oneWeek))

        n = 7
        flag = oneWeek / 7 == oneDay
        self.assertTrue(flag, "division error: {0} / {1:d}"
                              " not equal to {2}".format(oneWeek, n, oneDay))

        sum = threeDays
        sum += oneDay
        flag = sum == Period(4, TimeUnits.Days)
        self.assertTrue(flag, "sum error: {0}"
                              " + {1}"
                              " != {2}".format(threeDays, oneDay, Period(4, TimeUnits.Days)))

        sum += oneWeek
        flag = sum == Period(11, TimeUnits.Days)
        self.assertTrue(flag, "sum error: {0}"
                              " + {1}"
                              " + {2}"
                              " != {3}".format(threeDays, oneDay, oneWeek, Period(11, TimeUnits.Days)))

        sevenDays = Period(7, TimeUnits.Days)
        flag = sevenDays.length == 7
        self.assertTrue(flag, "normalization error: sevenDays.length"
                              " is {0:d}"
                              " instead of 7".format(sevenDays.length))
        flag = sevenDays.units == TimeUnits.Days
        self.assertTrue(flag, "normalization error: sevenDays.units"
                              " is {0:d}"
                              " instead of {1:d}".format(sevenDays.units, TimeUnits.Days))

        normalizedSevenDays = sevenDays.normalize()
        flag = normalizedSevenDays.length == 1
        self.assertTrue(flag, "normalization error: normalizedSevenDays.length"
                              " is {0:d}"
                              " instead of 1".format(normalizedSevenDays.length))
        flag = normalizedSevenDays.units == TimeUnits.Weeks
        self.assertTrue(flag, "normalization error: TwelveMonths.units"
                              " is {0:d}"
                              " instead of {1:d}".format(normalizedSevenDays.units, TimeUnits.Weeks))
예제 #6
0
def makeSchedule(firstDate, endDate, tenor):
    cal = Calendar('NullCalendar')
    firstDate = check_date(firstDate)
    endDate = check_date(endDate)
    tenor = Period(tenor)
    schedule = Schedule(firstDate, endDate, tenor, cal)
    return [d.toDateTime() for d in schedule]
예제 #7
0
def advanceDateByCalendar(holidayCenter,
                          referenceDate,
                          period,
                          convention=BizDayConventions.Following):
    cal = Calendar(holidayCenter)
    refer = check_date(referenceDate)
    period = Period(period)
    return cal.advanceDate(refer, period, convention).toDateTime()
    def testScheduleDeepCopy(self):
        startDate = Date(2013, 3, 31)
        endDate = Date(2013, 6, 30)
        tenor = Period('1m')
        cal = Calendar('NullCalendar')
        sch = Schedule(startDate, endDate, tenor, cal)
        copied_sch = copy.deepcopy(sch)

        self.assertEqual(sch, copied_sch)
 def testScheduleInitializeWithYearly(self):
     startDate = Date(2012, 2, 29)
     endDate = Date(2013, 3, 1)
     tenor = Period('1y')
     cal = Calendar('NullCalendar')
     sch = Schedule(startDate, endDate, tenor, cal)
     expected = [Date(2012, 2, 29), Date(2013, 2, 28), Date(2013, 3, 1)]
     for i in range(sch.size()):
         self.assertEqual(expected[i], sch[i])
예제 #10
0
def get_pos_adj_date(start_date,
                     end_date,
                     formats="%Y-%m-%d",
                     calendar='China.SSE',
                     freq='m',
                     return_biz_day=False):
    """
    :param start_date: str/datetime.datetime, start date of strategy
    :param end_date: str/datetime.datetime, end date of strat egy
    :param formats: optional, formats of the string date
    :param calendar: str, optional, name of the calendar to use in dates math
    :param freq: str, optional, the frequency of data
    :param return_biz_day: bool, optional, if the return dates are biz days
    :return: list of datetime.datetime, pos adjust dates
    """
    if isinstance(start_date, str) and isinstance(end_date, str):
        d_start_date = Date.strptime(start_date, formats)
        d_end_date = Date.strptime(end_date, formats)
    elif isinstance(start_date, datetime.datetime) and isinstance(
            end_date, datetime.datetime):
        d_start_date = Date.fromDateTime(start_date)
        d_end_date = Date.fromDateTime(end_date)

    cal = Calendar(calendar)
    pos_adjust_date = Schedule(d_start_date, d_end_date,
                               Period(length=1, units=_freqDict[freq]), cal,
                               BizDayConventions.Unadjusted)
    # it fails if setting dStartDate to be first adjustment date, then use Schedule to compute the others
    # so i first compute dates list in each period, then compute the last date of each period
    # last day of that period(month) is the pos adjustment date
    if _freqDict[freq] == TimeUnits.Weeks:
        pos_adjust_date = [
            Date.nextWeekday(date, Weekdays.Friday)
            for date in pos_adjust_date[:-1]
        ]
    elif _freqDict[freq] == TimeUnits.Months:
        pos_adjust_date = [
            cal.endOfMonth(date) for date in pos_adjust_date[:-1]
        ]
    elif _freqDict[freq] == TimeUnits.Years:
        pos_adjust_date = [
            Date(date.year(), 12, 31) for date in pos_adjust_date[:-1]
        ]

    if return_biz_day:
        pos_adjust_date = [
            cal.adjustDate(date, BizDayConventions.Preceding)
            for date in pos_adjust_date
        ]
    pos_adjust_date = [Date.toDateTime(date) for date in pos_adjust_date]
    pos_adjust_date = [
        date for date in pos_adjust_date if date <= d_end_date.toDateTime()
    ]

    return pos_adjust_date
예제 #11
0
    def testPeriodPickle(self):
        p1 = Period('36m')

        f = tempfile.NamedTemporaryFile('w+b', delete=False)
        pickle.dump(p1, f)
        f.close()

        with open(f.name, 'rb') as f2:
            pickled_period = pickle.load(f2)
            self.assertEqual(p1, pickled_period)

        os.unlink(f.name)
예제 #12
0
def makeSchedule(firstDate,
                 endDate,
                 tenor,
                 calendar='NullCalendar',
                 dateRule=BizDayConventions.Following):

    cal = Calendar(calendar)
    firstDate = check_date(firstDate)
    endDate = check_date(endDate)
    tenor = Period(tenor)
    schedule = Schedule(firstDate, endDate, tenor, cal, convention=dateRule)
    return [d.toDateTime() for d in schedule]
 def testScheduleInitialize(self):
     startDate = Date(2013, 3, 31)
     endDate = Date(2013, 6, 30)
     tenor = Period('1m')
     cal = Calendar('NullCalendar')
     sch = Schedule(startDate, endDate, tenor, cal)
     expected = [
         Date(2013, 3, 31),
         Date(2013, 4, 30),
         Date(2013, 5, 31),
         Date(2013, 6, 30)
     ]
     for i in range(sch.size()):
         self.assertEqual(expected[i], sch[i])
    def testSchedulePickle(self):
        startDate = Date(2013, 3, 31)
        endDate = Date(2013, 6, 30)
        tenor = Period('1m')
        cal = Calendar('NullCalendar')
        sch = Schedule(startDate, endDate, tenor, cal)

        f = tempfile.NamedTemporaryFile('w+b', delete=False)
        pickle.dump(sch, f)
        f.close()

        with open(f.name, 'rb') as f2:
            pickled_sch = pickle.load(f2)
            self.assertEqual(sch, pickled_sch)

        os.unlink(f.name)
예제 #15
0
def prepare_data(engine: SqlEngine,
                 factors: Union[Transformer, Iterable[object]],
                 start_date: str,
                 end_date: str,
                 frequency: str,
                 universe: Universe,
                 benchmark: int,
                 warm_start: int = 0):
    if warm_start > 0:
        p = Period(frequency)
        p = Period(length=-warm_start * p.length(), units=p.units())
        start_date = advanceDateByCalendar('china.sse', start_date,
                                           p).strftime('%Y-%m-%d')

    dates = makeSchedule(start_date,
                         end_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Forward)

    dates = [d.strftime('%Y-%m-%d') for d in dates]

    horizon = map_freq(frequency)

    if isinstance(factors, Transformer):
        transformer = factors
    else:
        transformer = Transformer(factors)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).sort_values(
                                              ['trade_date', 'code'])
    alpha_logger.info("factor data loading finished")
    return_df = engine.fetch_dx_return_range(universe,
                                             dates=dates,
                                             horizon=horizon)
    alpha_logger.info("return data loading finished")
    industry_df = engine.fetch_industry_range(universe, dates=dates)
    alpha_logger.info("industry data loading finished")
    benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
    alpha_logger.info("benchmark data loading finished")

    df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()
    df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
    df = pd.merge(df, industry_df, on=['trade_date', 'code'])
    df['weight'] = df['weight'].fillna(0.)

    return dates, df[['trade_date', 'code', 'dx']], df[[
        'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry'
    ] + transformer.names]
예제 #16
0
    def testAdvanceDate(self):
        referenceDate = Date(2014, 1, 31)
        sseCal = Calendar('China.SSE')
        ibCal = Calendar('China.IB')

        bizDayConv = BizDayConventions.Following

        # test null period
        self.assertEqual(
            sseCal.advanceDate(referenceDate, Period('0b'), bizDayConv),
            Date(2014, 2, 7))

        # test negative period
        self.assertEqual(
            sseCal.advanceDate(referenceDate, Period('-5b'), bizDayConv),
            Date(2014, 1, 24))

        # The difference is caused by Feb 8 is SSE holiday but a working day for IB market
        self.assertEqual(
            sseCal.advanceDate(referenceDate, Period('2b'), bizDayConv),
            Date(2014, 2, 10))
        self.assertEqual(
            sseCal.advanceDate(referenceDate, Period('2d'), bizDayConv),
            Date(2014, 2, 7))
        self.assertEqual(
            ibCal.advanceDate(referenceDate, Period('2b'), bizDayConv),
            Date(2014, 2, 8))
        self.assertEqual(
            ibCal.advanceDate(referenceDate, Period('2d'), bizDayConv),
            Date(2014, 2, 7))

        bizDayConv = BizDayConventions.ModifiedFollowing
        # May 31, 2014 is a holiday
        self.assertEqual(
            sseCal.advanceDate(referenceDate, Period('4m'), bizDayConv, True),
            Date(2014, 5, 30))
예제 #17
0
def fetch_train_phase(engine,
                      alpha_factors: Union[Transformer, Iterable[object]],
                      ref_date,
                      frequency,
                      universe,
                      batch=1,
                      neutralized_risk: Iterable[str] = None,
                      risk_model: str = 'short',
                      pre_process: Iterable[object] = None,
                      post_process: Iterable[object] = None,
                      warm_start: int = 0,
                      fit_target: Union[Transformer, object] = None) -> dict:
    if isinstance(alpha_factors, Transformer):
        transformer = alpha_factors
    else:
        transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    horizon = map_freq(frequency)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates)
    if fit_target is None:
        target_df = engine.fetch_dx_return_range(universe,
                                                 dates=dates,
                                                 horizon=horizon)
    else:
        one_more_date = advanceDateByCalendar('china.sse', dates[-1],
                                              frequency)
        target_df = engine.fetch_factor_range_forward(universe,
                                                      factors=fit_target,
                                                      dates=dates +
                                                      [one_more_date])
        target_df = target_df[target_df.trade_date.isin(dates)]
        target_df = target_df.groupby('code').apply(
            lambda x: x.fillna(method='pad'))

    df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna()

    target_df, factor_df = df[['trade_date', 'code',
                               'dx']], df[['trade_date', 'code'] +
                                          transformer.names]

    target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \
        _merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model,
                  neutralized_risk)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        pyFinAssert(
            len(dates) >= 2, ValueError,
            "No previous data for training for the date {0}".format(ref_date))
        end = dates[-2]
        start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0]
    else:
        end = dates[-1]
        start = dates[-batch] if batch <= len(dates) else dates[0]

    index = (date_label >= start) & (date_label <= end)
    this_raw_x = x_values[index]
    this_raw_y = y_values[index]
    this_code = codes[index]
    if risk_exp is not None:
        this_risk_exp = risk_exp[index]
    else:
        this_risk_exp = None

    ne_x = factor_processing(this_raw_x,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ne_y = factor_processing(this_raw_y,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ret = dict()
    ret['x_names'] = transformer.names
    ret['train'] = {
        'x': pd.DataFrame(ne_x, columns=transformer.names),
        'y': ne_y,
        'code': this_code
    }

    return ret
예제 #18
0
    def testPeriodDeepCopy(self):

        p1 = Period('36m')
        p2 = copy.deepcopy(p1)

        self.assertEqual(p1, p2)
예제 #19
0
def fetch_train_phase(engine,
                      alpha_factors: Iterable[object],
                      ref_date,
                      frequency,
                      universe,
                      batch,
                      neutralized_risk: Iterable[str] = None,
                      risk_model: str = 'short',
                      pre_process: Iterable[object] = None,
                      post_process: Iterable[object] = None,
                      warm_start: int = 0) -> dict:
    transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch + 1) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    horizon = _map_horizon(frequency)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates)
    return_df = engine.fetch_dx_return_range(universe,
                                             dates=dates,
                                             horizon=horizon)

    df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()

    return_df, factor_df = df[['trade_date', 'code',
                               'dx']], df[['trade_date', 'code', 'isOpen'] +
                                          transformer.names]

    return_df, dates, date_label, risk_exp, x_values, y_values, _, _ = \
        _merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        end = dates[-2]
        start = dates[-batch - 1]
    else:
        end = dates[-1]
        start = dates[-batch]

    index = (date_label >= start) & (date_label <= end)
    this_raw_x = x_values[index]
    this_raw_y = y_values[index]
    if risk_exp is not None:
        this_risk_exp = risk_exp[index]
    else:
        this_risk_exp = None

    ne_x = factor_processing(this_raw_x,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ne_y = factor_processing(this_raw_y,
                             pre_process=pre_process,
                             risk_factors=this_risk_exp,
                             post_process=post_process)

    ret = dict()
    ret['x_names'] = transformer.names
    ret['train'] = {'x': ne_x, 'y': ne_y}

    return ret
예제 #20
0
    def testComparingOperators(self):
        p1 = Period(length=0, units=TimeUnits.Days)
        p2 = Period(length=1, units=TimeUnits.Days)
        self.assertTrue(p1 < p2)

        p1 = Period(length=13, units=TimeUnits.Months)
        p2 = Period(length=1, units=TimeUnits.Years)
        self.assertTrue(not p1 < p2)

        p1 = Period(length=1, units=TimeUnits.Years)
        p2 = Period(length=13, units=TimeUnits.Months)
        self.assertTrue(p1 < p2)

        p1 = Period(length=13, units=TimeUnits.Days)
        p2 = Period(length=2, units=TimeUnits.Weeks)
        self.assertTrue(p1 < p2)

        p1 = Period(length=2, units=TimeUnits.Weeks)
        p2 = Period(length=13, units=TimeUnits.Days)
        self.assertTrue(not p1 < p2)

        p1 = Period(length=1, units=TimeUnits.Years)
        p2 = Period(length=56, units=TimeUnits.Weeks)
        self.assertTrue(p1 < p2)

        p1 = Period(length=56, units=TimeUnits.Weeks)
        p2 = Period(length=1, units=TimeUnits.Years)
        self.assertTrue(not p1 < p2)

        p1 = Period(length=21, units=TimeUnits.Weeks)
        p2 = Period(length=5, units=TimeUnits.Months)

        with self.assertRaises(ValueError):
            _ = p1 < p2

        p1 = Period(length=21, units=TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 < p2

        # test not equal operator
        p1 = Period(length=1, units=TimeUnits.Days)
        p2 = Period(length=1, units=TimeUnits.Days)
        self.assertTrue(not p1 != p2)

        p2 = Period(length=1, units=TimeUnits.Years)
        self.assertTrue(p1 != p2)

        # test greater than operator
        p1 = Period(length=1, units=TimeUnits.Days)
        p2 = Period(length=2, units=TimeUnits.Days)
        self.assertEqual(p1 < p2, not p1 > p2)
예제 #21
0
def dcam_strat_main(factor_loader_params, analyzer_params, selector_params,
                    portfolio_params, update_params):
    # FactorLoader params
    start_date = factor_loader_params['start_date']
    end_date = factor_loader_params['end_date']
    factor_norm_dict = factor_loader_params['factor_norm_dict']
    na_handler = factor_loader_params.get('na_handler', FactorNAHandler.Ignore)

    # dcam analyzer params
    factor_weight_type = analyzer_params.get('factor_weight_type',
                                             FactorWeightType.ICWeight)
    tiaocang_date_window_size = analyzer_params.get(
        'tiaocang_date_window_size', 12)
    save_sec_score = analyzer_params.get('save_sec_score', True)

    # selector params
    save_sec_selected = selector_params.get('save_sec_selected', True)
    nb_sec_selected_per_industry_min = selector_params.get(
        'nb_sec_selected_per_industry_min', 5)
    use_industry_name = selector_params.get('use_industry_name', True)
    nb_sec_selected_total = selector_params.get('nb_sec_selected_total', 100)
    ignore_zero_weight = selector_params.get('ignore_zero_weight', False)

    # portfolio params
    benchmark_sec_id = portfolio_params.get('benchmark_sec_id', '000905.SH')
    re_balance_freq = portfolio_params.get('re_balance_freq', FreqType.EOM)
    initial_capital = portfolio_params.get('initial_capital', 1000000000.0)
    filter_return_on_tiaocang_date = portfolio_params.get(
        'filter_return_on_tiaocang_date', 0.09)
    data_source = portfolio_params.get('data_source', DataSource.WIND)
    save_perf_file = portfolio_params.get('save_perf_file', False)
    risk_free = portfolio_params.get('risk_free', 0.0)

    update_factor = update_params.get('update_factor', False)
    update_sec_score = update_params.get('update_sec_score', False)
    update_sec_select = update_params.get('update_sec_select', False)

    factor = FactorLoader(start_date=start_date,
                          end_date=end_date,
                          factor_norm_dict=factor_norm_dict,
                          na_handler=na_handler)
    if update_factor:
        factor_data = factor.get_norm_factor_data()
        pickle_dump_data(factor_data, _factor_pkl_path)
    else:
        factor_data = pickle_load_data(_factor_pkl_path)

    if update_sec_score:
        layer_factor = [
            factor_data[name] for name in factor_norm_dict.keys()
            if factor_norm_dict[name][1] == DCAMFactorType.layerFactor
        ]
        alpha_factor = [
            factor_data[name] for name in factor_norm_dict.keys()
            if factor_norm_dict[name][1] == DCAMFactorType.alphaFactor
        ]
        alpha_factor_sign = [
            factor_data[name][2] for name in factor_norm_dict.keys()
            if factor_norm_dict[name][1] == DCAMFactorType.alphaFactor
        ]
        analyzer = DCAMAnalyzer(
            layer_factor=layer_factor,
            alpha_factor=alpha_factor,
            sec_return=factor_data['RETURN'],
            tiaocang_date=factor.get_tiaocang_date(),
            tiaocang_date_window_size=tiaocang_date_window_size,
            save_sec_score=save_sec_score,
            factor_weight_type=factor_weight_type,
            alpha_factor_sign=alpha_factor_sign)

        sec_score = analyzer.calc_sec_score()
    else:
        sec_score = load_sec_score(_sec_score_path)

    if update_sec_select:
        index_comp = IndexComp(industry_weight=factor_data['IND_WGT'])
        selector = Selector(
            sec_score=sec_score,
            industry=factor_data['INDUSTRY'],
            nb_sec_selected_per_industry_min=nb_sec_selected_per_industry_min,
            index_comp=index_comp,
            save_sec_selected=save_sec_selected,
            use_industry_name=use_industry_name,
            nb_sec_selected_total=nb_sec_selected_total,
            ignore_zero_weight=ignore_zero_weight)
        selector.industry_neutral = True
        selector.sec_selection()
        sec_selected = selector.sec_selected_full_info
        pprint(selector.sec_selected_full_info)
    else:
        sec_selected = load_sec_selected(_sec_selected_path)

    # construct strategy ptf
    # 价格数据需要使用到最后一个调仓日的后一个月末
    sse_cal = Calendar('China.SSE')
    end_date_for_price_data = str(
        sse_cal.advanceDate(Date.strptime(end_date), Period('1m')))
    strategy = Portfolio(
        sec_selected=sec_selected,
        end_date=end_date_for_price_data,
        initial_capital=initial_capital,
        filter_return_on_tiaocang_date=filter_return_on_tiaocang_date,
        data_source=data_source,
        benchmark_sec_id=benchmark_sec_id,
        re_balance_freq=re_balance_freq,
        save_perf_file=save_perf_file,
        risk_free=risk_free)
    strategy.evaluate_ptf_return()
예제 #22
0
    def testYearsMonthsAlgebra(self):
        oneYear = Period(1, TimeUnits.Years)
        sixMonths = Period(6, TimeUnits.Months)
        threeMonths = Period(3, TimeUnits.Months)

        n = 4
        flag = oneYear / n == threeMonths
        self.assertTrue(flag, "division error: {0} / {1:d}"
                              " not equal to {2}".format(oneYear, n, threeMonths))

        n = 2
        flag = oneYear / n == sixMonths
        self.assertTrue(flag, "division error: {0} / {1:d}"
                              " not equal to {2}".format(oneYear, n, sixMonths))

        sum = threeMonths
        sum += sixMonths
        flag = sum == Period(9, TimeUnits.Months)
        self.assertTrue(flag, "sum error: {0}"
                              " + {1}"
                              " != {2}".format(threeMonths, sixMonths, Period(9, TimeUnits.Months)))

        sum += oneYear
        flag = sum == Period(21, TimeUnits.Months)
        self.assertTrue(flag, "sum error: {0}"
                              " + {1}"
                              " + {2}"
                              " != {3}".format(threeMonths, sixMonths, oneYear, Period(21, TimeUnits.Months)))

        twelveMonths = Period(12, TimeUnits.Months)
        flag = twelveMonths.length == 12
        self.assertTrue(flag, "normalization error: TwelveMonths.length"
                              " is {0:d}"
                              " instead of 12".format(twelveMonths.length))
        flag = twelveMonths.units == TimeUnits.Months
        self.assertTrue(flag, "normalization error: TwelveMonths.units"
                              " is {0:d}"
                              " instead of {1:d}".format(twelveMonths.units, TimeUnits.Months))

        normalizedTwelveMonths = Period(12, TimeUnits.Months)
        normalizedTwelveMonths = normalizedTwelveMonths.normalize()
        flag = normalizedTwelveMonths.length == 1
        self.assertTrue(flag, "normalization error: TwelveMonths.length"
                              " is {0:d}"
                              " instead of 1".format(twelveMonths.length))
        flag = normalizedTwelveMonths.units == TimeUnits.Years
        self.assertTrue(flag, "normalization error: TwelveMonths.units"
                              " is {0:d}"
                              " instead of {1:d}".format(twelveMonths.units, TimeUnits.Years))

        thirtyDays = Period(30, TimeUnits.Days)
        normalizedThirtyDays = thirtyDays.normalize()
        flag = normalizedThirtyDays.units == TimeUnits.Days
        self.assertTrue(flag, "normalization error: ThirtyDays.units"
                              " is {0:d}"
                              " instead of {1:d}".format(normalizedThirtyDays.units, TimeUnits.Days))

        thirtyBDays = Period(30, TimeUnits.BDays)
        normalizedThirtyBDays = thirtyBDays.normalize()
        flag = normalizedThirtyBDays.units == TimeUnits.BDays
        self.assertTrue(flag, "normalization error: ThirtyBDays.units"
                              " is {0:d}"
                              " instead of {1:d}".format(normalizedThirtyBDays.units, TimeUnits.BDays))
예제 #23
0
def prepare_data(engine: SqlEngine,
                 factors: Union[Transformer, Iterable[object]],
                 start_date: str,
                 end_date: str,
                 frequency: str,
                 universe: Universe,
                 benchmark: int,
                 warm_start: int = 0,
                 fit_target: Union[Transformer, object] = None):
    if warm_start > 0:
        p = Period(frequency)
        p = Period(length=-warm_start * p.length(), units=p.units())
        start_date = advanceDateByCalendar('china.sse', start_date,
                                           p).strftime('%Y-%m-%d')

    dates = makeSchedule(start_date,
                         end_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Forward)

    dates = [d.strftime('%Y-%m-%d') for d in dates]

    horizon = map_freq(frequency)

    if isinstance(factors, Transformer):
        transformer = factors
    else:
        transformer = Transformer(factors)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).sort_values(
                                              ['trade_date', 'code'])
    alpha_logger.info("factor data loading finished")

    if fit_target is None:
        target_df = engine.fetch_dx_return_range(universe,
                                                 dates=dates,
                                                 horizon=horizon)
    else:
        one_more_date = advanceDateByCalendar('china.sse', dates[-1],
                                              frequency)
        target_df = engine.fetch_factor_range_forward(universe,
                                                      factors=fit_target,
                                                      dates=dates +
                                                      [one_more_date])
        target_df = target_df[target_df.trade_date.isin(dates)]
        target_df = target_df.groupby('code').apply(
            lambda x: x.fillna(method='pad'))
    alpha_logger.info("fit target data loading finished")

    industry_df = engine.fetch_industry_range(universe, dates=dates)
    alpha_logger.info("industry data loading finished")
    benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
    alpha_logger.info("benchmark data loading finished")

    df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna()
    df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
    df = pd.merge(df, industry_df, on=['trade_date', 'code'])
    df['weight'] = df['weight'].fillna(0.)
    df.dropna(inplace=True)

    return dates, df[[
        'trade_date', 'code', 'dx'
    ]], df[['trade_date', 'code', 'weight', 'industry_code', 'industry'] +
           transformer.names]
예제 #24
0
    def testBasicArithmic(self):
        # test bad normalize
        testPriod = Period(length=1, units=TimeUnits.Years)
        testPriod._units = 10
        with self.assertRaises(TypeError):
            testPriod.normalize()

        # test plus method
        p1 = Period(length=0, units=TimeUnits.Days)
        p2 = Period(length=10, units=TimeUnits.Months)
        calculated = p1 + p2
        self.assertEqual(
            p2, calculated,
            "added value {0} should be equal to {1}".format(calculated, p2))

        p1 = Period(length=2, units=TimeUnits.Years)
        p2 = Period(length=13, units=TimeUnits.Months)
        calculated = p1 + p2
        expected = Period(length=37, units=TimeUnits.Months)
        self.assertEqual(
            expected, calculated,
            "added value {0} should be equal to {1}".format(
                calculated, expected))

        p2 = Period(length=2, units=TimeUnits.Weeks)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.Days)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(length=13, units=TimeUnits.Months)
        p2 = Period(length=2, units=TimeUnits.Years)
        calculated = p1 + p2
        expected = Period(length=37, units=TimeUnits.Months)
        self.assertEqual(
            expected, calculated,
            "added value {0} should be equal to {1}".format(
                calculated, expected))

        p2 = Period(length=2, units=TimeUnits.Weeks)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.Days)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(length=2, units=TimeUnits.Weeks)
        p2 = Period(length=7, units=TimeUnits.Days)
        calculated = p1 + p2
        expected = Period(length=21, units=TimeUnits.Days)
        self.assertEqual(
            expected, calculated,
            "added value {0} should be equal to {1}".format(
                calculated, expected))

        p2 = Period(length=2, units=TimeUnits.Months)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.Years)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(length=7, units=TimeUnits.Days)
        p2 = Period(length=2, units=TimeUnits.Weeks)
        calculated = p1 + p2
        expected = Period(length=21, units=TimeUnits.Days)
        self.assertEqual(
            expected, calculated,
            "added value {0} should be equal to {1}".format(
                calculated, expected))

        p2 = Period(length=2, units=TimeUnits.Months)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.Years)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(length=7, units=TimeUnits.BDays)

        p2 = Period(length=2, units=TimeUnits.Months)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.Days)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.Weeks)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.Years)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(length=2, units=TimeUnits.BDays)
        self.assertEqual(p1 + p2, Period('9B'))

        # test negative operator
        p1 = Period(length=-13, units=TimeUnits.Weeks)
        p2 = -p1
        self.assertEqual(p2, Period(length=13, units=TimeUnits.Weeks))

        # test less operator
        p1 = Period(length=0, units=TimeUnits.Days)
        p2 = Period(length=-3, units=TimeUnits.BDays)
        self.assertTrue(p2 < p1)

        # test sub operator
        p1 = Period(length=0, units=TimeUnits.Days)
        p2 = Period(length=-3, units=TimeUnits.BDays)
        self.assertEqual(p1 - p2, Period('3b'))

        # test string representation
        p1 = Period(length=12, units=TimeUnits.Months)
        self.assertEqual("1Y", p1.__str__())
예제 #25
0
    def testWeeksDaysAlgebra(self):
        twoWeeks = Period(length=2, units=TimeUnits.Weeks)
        oneWeek = Period(length=1, units=TimeUnits.Weeks)
        threeDays = Period(length=3, units=TimeUnits.Days)
        oneDay = Period(length=1, units=TimeUnits.Days)

        n = 2
        flag = twoWeeks / n == oneWeek
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(twoWeeks, n, oneWeek))

        n = 7
        flag = oneWeek / 7 == oneDay
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(oneWeek, n, oneDay))

        sum = threeDays
        sum += oneDay
        flag = sum == Period(length=4, units=TimeUnits.Days)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " != {2}".format(threeDays, oneDay,
                             Period(length=4, units=TimeUnits.Days)))

        sum += oneWeek
        flag = sum == Period(length=11, units=TimeUnits.Days)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " + {2}"
            " != {3}".format(threeDays, oneDay, oneWeek,
                             Period(length=11, units=TimeUnits.Days)))

        sevenDays = Period(length=7, units=TimeUnits.Days)
        flag = sevenDays.length() == 7
        self.assertTrue(
            flag, "normalization error: sevenDays.length"
            " is {0:d}"
            " instead of 7".format(sevenDays.length()))
        flag = sevenDays.units() == TimeUnits.Days
        self.assertTrue(
            flag, "normalization error: sevenDays.units"
            " is {0:d}"
            " instead of {1:d}".format(sevenDays.units(), TimeUnits.Days))

        normalizedSevenDays = sevenDays.normalize()
        flag = normalizedSevenDays.length() == 1
        self.assertTrue(
            flag, "normalization error: normalizedSevenDays.length"
            " is {0:d}"
            " instead of 1".format(normalizedSevenDays.length()))
        flag = normalizedSevenDays.units() == TimeUnits.Weeks
        self.assertTrue(
            flag, "normalization error: TwelveMonths.units"
            " is {0:d}"
            " instead of {1:d}".format(normalizedSevenDays.units(),
                                       TimeUnits.Weeks))
예제 #26
0
    def testComparingOperators(self):
        p1 = Period(0, TimeUnits.Days)
        p2 = Period(1, TimeUnits.Days)
        self.assertTrue(p1 < p2)

        p1 = Period(13, TimeUnits.Months)
        p2 = Period(1, TimeUnits.Years)
        self.assertTrue(not p1 < p2)

        p1 = Period(1, TimeUnits.Years)
        p2 = Period(13, TimeUnits.Months)
        self.assertTrue(p1 < p2)

        p1 = Period(13, TimeUnits.Days)
        p2 = Period(2, TimeUnits.Weeks)
        self.assertTrue(p1 < p2)

        p1 = Period(2, TimeUnits.Weeks)
        p2 = Period(13, TimeUnits.Days)
        self.assertTrue(not p1 < p2)

        p1 = Period(1, TimeUnits.Years)
        p2 = Period(56, TimeUnits.Weeks)
        self.assertTrue(p1 < p2)

        p1 = Period(56, TimeUnits.Weeks)
        p2 = Period(1, TimeUnits.Years)
        self.assertTrue(not p1 < p2)

        p1 = Period(21, TimeUnits.Weeks)
        p2 = Period(5, TimeUnits.Months)

        with self.assertRaises(ValueError):
            _ = p1 < p2

        p1 = Period(21, TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 < p2

        # test not equal operator
        p1 = Period(1, TimeUnits.Days)
        p2 = Period(1, TimeUnits.Days)
        self.assertTrue(not p1 != p2)

        p2 = Period(1, TimeUnits.Years)
        self.assertTrue(p1 != p2)

        # test greater than operator
        p1 = Period(1, TimeUnits.Days)
        p2 = Period(2, TimeUnits.Days)
        self.assertEqual(p1 < p2, not p1 > p2)
예제 #27
0
    def testBasicFunctions(self):
        year = 2015
        month = 7
        day = 24
        strRepr = "{0}-{1:02d}-{2:02d}".format(year, month, day)
        innerRepr = "Date({0}, {1}, {2})".format(year, month, day)

        testDate = Date(year, month, day)
        self.assertEqual(
            str(testDate), strRepr, "date string:\n"
            "expected:   {0:s}\n"
            "calculated: {1:s}".format(strRepr, str(testDate)))

        self.assertEqual(
            repr(testDate), innerRepr, "date representation:\n"
            "expected:   {0:s}\n"
            "calculated: {1:s}".format(innerRepr, repr(testDate)))

        self.assertEqual(
            testDate.year(), year, "date year:\n"
            "expected:   {0:d}\n"
            "calculated: {1:d}".format(year, testDate.year()))

        self.assertEqual(
            testDate.month(), month, "date month:\n"
            "expected:   {0:d}\n"
            "calculated: {1:d}".format(month, testDate.month()))

        self.assertEqual(
            testDate.dayOfMonth(), day, "date day:\n"
            "expected:   {0:d}\n"
            "calculated: {1:d}".format(day, testDate.dayOfMonth()))

        self.assertEqual(
            testDate.dayOfYear(), testDate - Date(2015, 1, 1) + 1,
            "date day:\n"
            "expected:   {0:d}\n"
            "calculated: {1:d}".format(testDate - Date(2015, 1, 1) + 1,
                                       testDate.dayOfYear()))
        self.assertEqual(
            testDate.weekday(), 6, "date weekday:\n"
            "expected:   {0:d}\n"
            "calculated: {1:d}".format(5, testDate.weekday()))

        self.assertEqual(
            testDate.toDateTime(), dt.datetime(year, month, day),
            "date datetime representation\n"
            "expected:   {0}\n"
            "calculated: {1}".format(dt.datetime(year, month, day),
                                     testDate.toDateTime()))

        serialNumber = testDate.serialNumber
        serialDate = Date(serialNumber=serialNumber)

        self.assertEqual(
            serialDate, testDate, "date excel serial number representation\n"
            "expected:   {0:d}"
            "calculated: {1:d}".format(serialDate.serialNumber,
                                       testDate.serialNumber))

        # test comparisons
        previousDate = testDate - 1
        self.assertTrue(
            previousDate < testDate,
            "{0} is not earlier than {1}".format(previousDate, testDate))
        self.assertFalse(
            previousDate >= testDate,
            "{0} should not be later than or equal to {1}".format(
                previousDate, testDate))
        self.assertTrue((previousDate + 1) == testDate,
                        "{0} plus one day should be equal to {1}".format(
                            previousDate, testDate))

        # check static members
        self.assertEqual(Date.minDate(), Date(1901, 1, 1), "min date is wrong")
        self.assertEqual(Date.maxDate(), Date(2199, 12, 31),
                         "max date is wrong")
        self.assertEqual(Date.endOfMonth(testDate), Date(year, month, 31),
                         "end of month is wrong")
        self.assertTrue(Date.isEndOfMonth(Date(year, month, 31)),
                        "{0} should be the end of month")
        self.assertEqual(
            Date.nextWeekday(testDate, testDate.weekday()), testDate,
            "{0}'s next same week day should be {1}".format(
                testDate, testDate))
        expectedDate = dt.date.today()
        expectedDate = dt.datetime(expectedDate.year, expectedDate.month,
                                   expectedDate.day)
        self.assertEqual(
            Date.todaysDate().toDateTime(), expectedDate, "today's date\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, Date.todaysDate()))

        # nth-week day
        with self.assertRaises(ValueError):
            _ = Date.nthWeekday(0, Weekdays.Friday, 1, 2015)

        with self.assertRaises(ValueError):
            _ = Date.nthWeekday(6, Weekdays.Friday, 1, 2015)

        self.assertEqual(Date.nthWeekday(3, Weekdays.Wednesday, 8, 2015),
                         Date(2015, 8, 19))

        # check plus/sub

        threeWeeksAfter = testDate + '3W'
        expectedDate = testDate + 21
        self.assertEqual(
            threeWeeksAfter, expectedDate, "date + 3w period\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, threeWeeksAfter))

        threeMonthsBefore = testDate - "3M"
        expectedDate = Date(year, month - 3, day)
        self.assertEqual(
            threeMonthsBefore, expectedDate, "date - 3m period\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, threeMonthsBefore))

        threeMonthsBefore = testDate - Period("3M")
        expectedDate = Date(year, month - 3, day)
        self.assertEqual(
            threeMonthsBefore, expectedDate, "date - 3m period\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, threeMonthsBefore))

        threeMonthsAfter = testDate + "3m"
        expectedDate = Date(year, month + 3, day)
        self.assertEqual(
            threeMonthsAfter, expectedDate, "date + 3m period\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, threeMonthsAfter))

        oneYearAndTwoMonthsBefore = testDate - "14m"
        expectedDate = Date(year - 1, month - 2, day)
        self.assertEqual(
            oneYearAndTwoMonthsBefore, expectedDate, "date - 14m period\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, threeMonthsBefore))

        oneYearAndTwoMonthsBefore = testDate + "14m"
        expectedDate = Date(year + 1, month + 2, day)
        self.assertEqual(
            oneYearAndTwoMonthsBefore, expectedDate, "date + 14m period\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, threeMonthsBefore))

        fiveMonthsAfter = testDate + "5m"
        expectedDate = Date(year, month + 5, day)
        self.assertEqual(
            fiveMonthsAfter, expectedDate, "date + 5m period\n"
            "expected:   {0}\n"
            "calculated: {1}".format(expectedDate, fiveMonthsAfter))
예제 #28
0
def fetch_predict_phase(engine,
                        alpha_factors: Iterable[object],
                        ref_date,
                        frequency,
                        universe,
                        batch,
                        neutralized_risk: Iterable[str] = None,
                        risk_model: str = 'short',
                        pre_process: Iterable[object] = None,
                        post_process: Iterable[object] = None,
                        warm_start: int = 0):
    transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates).dropna()

    names = transformer.names

    if neutralized_risk:
        risk_df = engine.fetch_risk_model_range(universe,
                                                dates=dates,
                                                risk_model=risk_model)[1]
        used_neutralized_risk = list(set(neutralized_risk).difference(names))
        risk_df = risk_df[['trade_date', 'code'] +
                          used_neutralized_risk].dropna()
        train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
        risk_exp = train_x[neutralized_risk].values.astype(float)
        x_values = train_x[names].values.astype(float)
    else:
        train_x = factor_df.copy()
        risk_exp = None

    date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
    dates = np.unique(date_label)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        end = dates[-1]
        start = dates[-batch]

        # index = (date_label >= start) & (date_label <= end)
        left_index = bisect.bisect_left(date_label, start)
        right_index = bisect.bisect_right(date_label, end)
        this_raw_x = x_values[left_index:right_index]
        sub_dates = date_label[left_index:right_index]

        if risk_exp is not None:
            this_risk_exp = risk_exp[left_index:right_index]
        else:
            this_risk_exp = None

        ne_x = factor_processing(this_raw_x,
                                 pre_process=pre_process,
                                 risk_factors=this_risk_exp,
                                 post_process=post_process)

        inner_left_index = bisect.bisect_left(sub_dates, end)
        inner_right_index = bisect.bisect_right(sub_dates, end)

        ne_x = ne_x[inner_left_index:inner_right_index]

        left_index = bisect.bisect_left(date_label, end)
        right_index = bisect.bisect_right(date_label, end)

        codes = train_x.code.values[left_index:right_index]
    else:
        ne_x = None
        codes = None

    ret = dict()
    ret['x_names'] = transformer.names
    ret['predict'] = {'x': ne_x, 'code': codes}

    return ret
예제 #29
0
    def testBasicArithmic(self):
        # test bad normalize
        testPriod = Period(1, TimeUnits.Years)
        testPriod._units = 10
        with self.assertRaises(TypeError):
            testPriod.normalize()

        # test plus method
        p1 = Period(0, TimeUnits.Days)
        p2 = Period(10, TimeUnits.Months)
        calculated = p1 + p2
        self.assertEqual(p2, calculated, "added value {0} should be equal to {1}".format(calculated, p2))

        p1 = Period(2, TimeUnits.Years)
        p2 = Period(13, TimeUnits.Months)
        calculated = p1 + p2
        expected = Period(37, TimeUnits.Months)
        self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected))

        p2 = Period(2, TimeUnits.Weeks)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.Days)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(13, TimeUnits.Months)
        p2 = Period(2, TimeUnits.Years)
        calculated = p1 + p2
        expected = Period(37, TimeUnits.Months)
        self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected))

        p2 = Period(2, TimeUnits.Weeks)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.Days)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(2, TimeUnits.Weeks)
        p2 = Period(7, TimeUnits.Days)
        calculated = p1 + p2
        expected = Period(21, TimeUnits.Days)
        self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected))

        p2 = Period(2, TimeUnits.Months)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.Years)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(7, TimeUnits.Days)
        p2 = Period(2, TimeUnits.Weeks)
        calculated = p1 + p2
        expected = Period(21, TimeUnits.Days)
        self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected))

        p2 = Period(2, TimeUnits.Months)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.BDays)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.Years)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p1 = Period(7, TimeUnits.BDays)

        p2 = Period(2, TimeUnits.Months)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.Days)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.Weeks)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.Years)
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2._units = 10
        with self.assertRaises(ValueError):
            _ = p1 + p2

        p2 = Period(2, TimeUnits.BDays)
        self.assertEqual(p1 + p2, Period('9B'))

        # test negative operator
        p1 = Period(-13, TimeUnits.Weeks)
        p2 = -p1
        self.assertEqual(p2, Period(13, TimeUnits.Weeks))

        # test less operator
        p1 = Period(0, TimeUnits.Days)
        p2 = Period(-3, TimeUnits.BDays)
        self.assertTrue(p2 < p1)

        # test sub operator
        p1 = Period(0, TimeUnits.Days)
        p2 = Period(-3, TimeUnits.BDays)
        self.assertEqual(p1 - p2, Period('3b'))

        # test string representation
        p1 = Period(12, TimeUnits.Months)
        self.assertEqual("1Y", p1.__str__())
예제 #30
0
def fetch_predict_phase(engine,
                        alpha_factors: Union[Transformer, Iterable[object]],
                        ref_date,
                        frequency,
                        universe,
                        batch=1,
                        neutralized_risk: Iterable[str] = None,
                        risk_model: str = 'short',
                        pre_process: Iterable[object] = None,
                        post_process: Iterable[object] = None,
                        warm_start: int = 0,
                        fillna: str = None,
                        fit_target: Union[Transformer, object] = None):
    if isinstance(alpha_factors, Transformer):
        transformer = alpha_factors
    else:
        transformer = Transformer(alpha_factors)

    p = Period(frequency)
    p = Period(length=-(warm_start + batch - 1) * p.length(), units=p.units())

    start_date = advanceDateByCalendar('china.sse', ref_date, p,
                                       BizDayConventions.Following)
    dates = makeSchedule(start_date,
                         ref_date,
                         frequency,
                         calendar='china.sse',
                         dateRule=BizDayConventions.Following,
                         dateGenerationRule=DateGeneration.Backward)

    horizon = map_freq(frequency)

    factor_df = engine.fetch_factor_range(universe,
                                          factors=transformer,
                                          dates=dates)

    if fillna:
        factor_df = factor_df.groupby('trade_date').apply(
            lambda x: x.fillna(x.median())).reset_index(drop=True).dropna()
    else:
        factor_df = factor_df.dropna()

    if fit_target is None:
        target_df = engine.fetch_dx_return_range(universe,
                                                 dates=dates,
                                                 horizon=horizon)
    else:
        one_more_date = advanceDateByCalendar('china.sse', dates[-1],
                                              frequency)
        target_df = engine.fetch_factor_range_forward(universe,
                                                      factors=fit_target,
                                                      dates=dates +
                                                      [one_more_date])
        target_df = target_df[target_df.trade_date.isin(dates)]
        target_df = target_df.groupby('code').apply(
            lambda x: x.fillna(method='pad'))

    names = transformer.names

    if neutralized_risk:
        risk_df = engine.fetch_risk_model_range(universe,
                                                dates=dates,
                                                risk_model=risk_model)[1]
        used_neutralized_risk = list(set(neutralized_risk).difference(names))
        risk_df = risk_df[['trade_date', 'code'] +
                          used_neutralized_risk].dropna()
        train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
        train_x = pd.merge(train_x,
                           target_df,
                           on=['trade_date', 'code'],
                           how='left')
        risk_exp = train_x[neutralized_risk].values.astype(float)
    else:
        train_x = pd.merge(factor_df,
                           target_df,
                           on=['trade_date', 'code'],
                           how='left')
        risk_exp = None

    train_x.dropna(inplace=True, subset=train_x.columns[:-1])
    x_values = train_x[names].values.astype(float)
    y_values = train_x[['dx']].values.astype(float)

    date_label = pd.DatetimeIndex(train_x.trade_date).to_pydatetime()
    dates = np.unique(date_label)

    if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
        end = dates[-1]
        start = dates[-batch] if batch <= len(dates) else dates[0]

        left_index = bisect.bisect_left(date_label, start)
        right_index = bisect.bisect_right(date_label, end)
        this_raw_x = x_values[left_index:right_index]
        this_raw_y = y_values[left_index:right_index]
        sub_dates = date_label[left_index:right_index]

        if risk_exp is not None:
            this_risk_exp = risk_exp[left_index:right_index]
        else:
            this_risk_exp = None

        ne_x = factor_processing(this_raw_x,
                                 pre_process=pre_process,
                                 risk_factors=this_risk_exp,
                                 post_process=post_process)

        ne_y = factor_processing(this_raw_y,
                                 pre_process=pre_process,
                                 risk_factors=this_risk_exp,
                                 post_process=post_process)

        inner_left_index = bisect.bisect_left(sub_dates, end)
        inner_right_index = bisect.bisect_right(sub_dates, end)

        ne_x = ne_x[inner_left_index:inner_right_index]
        ne_y = ne_y[inner_left_index:inner_right_index]

        left_index = bisect.bisect_left(date_label, end)
        right_index = bisect.bisect_right(date_label, end)

        codes = train_x.code.values[left_index:right_index]
    else:
        ne_x = None
        ne_y = None
        codes = None

    ret = dict()
    ret['x_names'] = transformer.names
    ret['predict'] = {
        'x': pd.DataFrame(ne_x, columns=transformer.names),
        'code': codes,
        'y': ne_y.flatten()
    }

    return ret
예제 #31
0
    def testYearsMonthsAlgebra(self):
        oneYear = Period(length=1, units=TimeUnits.Years)
        sixMonths = Period(length=6, units=TimeUnits.Months)
        threeMonths = Period(length=3, units=TimeUnits.Months)

        n = 4
        flag = oneYear / n == threeMonths
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(oneYear, n, threeMonths))

        n = 2
        flag = oneYear / n == sixMonths
        self.assertTrue(
            flag, "division error: {0} / {1:d}"
            " not equal to {2}".format(oneYear, n, sixMonths))

        sum = threeMonths
        sum += sixMonths
        flag = sum == Period(length=9, units=TimeUnits.Months)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " != {2}".format(threeMonths, sixMonths,
                             Period(length=9, units=TimeUnits.Months)))

        sum += oneYear
        flag = sum == Period(length=21, units=TimeUnits.Months)
        self.assertTrue(
            flag, "sum error: {0}"
            " + {1}"
            " + {2}"
            " != {3}".format(threeMonths, sixMonths, oneYear,
                             Period(length=21, units=TimeUnits.Months)))

        twelveMonths = Period(length=12, units=TimeUnits.Months)
        flag = twelveMonths.length() == 12
        self.assertTrue(
            flag, "normalization error: TwelveMonths.length"
            " is {0:d}"
            " instead of 12".format(twelveMonths.length()))
        flag = twelveMonths.units() == TimeUnits.Months
        self.assertTrue(
            flag, "normalization error: TwelveMonths.units"
            " is {0:d}"
            " instead of {1:d}".format(twelveMonths.units(), TimeUnits.Months))

        normalizedTwelveMonths = Period(length=12, units=TimeUnits.Months)
        normalizedTwelveMonths = normalizedTwelveMonths.normalize()
        flag = normalizedTwelveMonths.length() == 1
        self.assertTrue(
            flag, "normalization error: TwelveMonths.length"
            " is {0:d}"
            " instead of 1".format(twelveMonths.length()))
        flag = normalizedTwelveMonths.units() == TimeUnits.Years
        self.assertTrue(
            flag, "normalization error: TwelveMonths.units"
            " is {0:d}"
            " instead of {1:d}".format(twelveMonths.units(), TimeUnits.Years))

        thirtyDays = Period(length=30, units=TimeUnits.Days)
        normalizedThirtyDays = thirtyDays.normalize()
        flag = normalizedThirtyDays.units() == TimeUnits.Days
        self.assertTrue(
            flag, "normalization error: ThirtyDays.units"
            " is {0:d}"
            " instead of {1:d}".format(normalizedThirtyDays.units(),
                                       TimeUnits.Days))

        thirtyBDays = Period(length=30, units=TimeUnits.BDays)
        normalizedThirtyBDays = thirtyBDays.normalize()
        flag = normalizedThirtyBDays.units() == TimeUnits.BDays
        self.assertTrue(
            flag, "normalization error: ThirtyBDays.units"
            " is {0:d}"
            " instead of {1:d}".format(normalizedThirtyBDays.units(),
                                       TimeUnits.BDays))