Ejemplo n.º 1
0
def GetEndDateList(data, freq, trim_end=False):
    '''
    trim主要用于日度数据resample成低频数据
    :param data:
    :param freq:
    :param trim_end:
    :return:
    '''
    if freq == 'M':
        date_list = data.index.where(
            data.index == ((data.index + MonthEnd()) - MonthEnd()),
            data.index + MonthEnd())
    elif freq == 'W':
        week_day = 5  # 0-6分别对应周一至周日
        date_list = data.index.where(
            data.index == ((data.index + Week(weekday=week_day)) - Week()),
            data.index + Week(weekday=week_day))
    elif freq == 'Y':
        date_list = data.index.where(
            data.index == ((data.index + YearEnd()) - YearEnd()),
            data.index + YearEnd())
    if trim_end:
        return sorted(set(date_list))[:-1]
    else:
        return sorted(set(date_list))
Ejemplo n.º 2
0
def GetEndDateList(data, freq, trim_end=False):
    '''
    trim主要用于日度数据
    :param data:
    :param freq:
    :param trim_end:
    :return:
    '''
    if freq=='M':
        date_list=data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()),
                                      data.index + MonthEnd())
        #date_list=pd.to_datetime(date_list.astype(str),format='%Y%m')+MonthEnd()
    elif freq=='W':
        week_day = 5  # 0-6分别对应周一至周日
        date_list = data.index.where(data.index == ((data.index + Week(weekday=week_day)) - Week()),
                                      data.index + Week(weekday=week_day))

        #date_list=pd.to_datetime(date_list.astype(str).str.pad(7,side='right',fillchar='6'),format='%Y%W%w')
    elif freq=='Y':
        date_list = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()),
                                      data.index + YearEnd())
        #date_list = pd.to_datetime(date_list.astype(str), format='%Y') + YearEnd()
    if trim_end:
        return sorted(set(date_list))[:-1]
    else:
        return sorted(set(date_list))
Ejemplo n.º 3
0
def resample_index(dat, to_freq):
    '''
    例如: 20180808 -> 20180831
          20180809 -> 20180831
    注意:
        1、使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错;
        2、此方法会掩盖真实交易日期(全都转换为自然年月末尾值)
    :param dat:
    :param to_freq:
    :return:
    '''
    data = dat.copy()
    if to_freq == 'M':
        data.index = data.index.where(
            data.index == ((data.index + MonthEnd()) - MonthEnd()),
            data.index + MonthEnd())
    elif to_freq == 'W':
        # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误
        week_day = 5  #0-6分别对应周一至周日
        data.index = data.index.where(
            data.index == ((data.index + Week(weekday=week_day)) - Week()),
            data.index + Week(weekday=week_day))

    elif to_freq == 'Y':
        data.index = data.index.where(
            data.index == ((data.index + YearEnd()) - YearEnd()),
            data.index + YearEnd())
    return data
Ejemplo n.º 4
0
def time_for_next_update(last_time, period='D', hour=9):
    """计算下次更新时间
    说明:
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将小时调整到指定的hour
    """
    if pd.isnull(last_time):
        return MARKET_START
    period = period.upper()
    if period == 'D':
        d = BDay(normalize=True)
        return d.apply(last_time).replace(hour=hour)
    elif period == 'W':
        w = Week(normalize=True, weekday=0)
        return w.apply(last_time).replace(hour=hour)
    elif period == 'M':
        m = MonthBegin(normalize=True)
        return m.apply(last_time).replace(hour=hour)
    elif period == 'Q':
        q = QuarterBegin(normalize=True)
        return q.apply(last_time).replace(hour=hour)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(('D', 'W', 'M', 'Q')))
Ejemplo n.º 5
0
    def test_get_offset_name(self):
        assert BDay().freqstr == "B"
        assert BDay(2).freqstr == "2B"
        assert BMonthEnd().freqstr == "BM"
        assert Week(weekday=0).freqstr == "W-MON"
        assert Week(weekday=1).freqstr == "W-TUE"
        assert Week(weekday=2).freqstr == "W-WED"
        assert Week(weekday=3).freqstr == "W-THU"
        assert Week(weekday=4).freqstr == "W-FRI"

        assert LastWeekOfMonth(weekday=WeekDay.SUN).freqstr == "LWOM-SUN"
def process_setup(config,
                  process_config,
                  item,
                  hist_end_date,
                  group=P_BASELINE_PARAMS):
    '''
    Complete build of configuration objects and load the parameters
    
    2016-06-03 -- Adeded logic to respect the hist_start_date on the item.
     
    '''
    #get baseline parameters and update config with all item parameters and attributes.
    config, params = get_item_params(config,
                                     item,
                                     as_of=hist_end_date,
                                     group=In(group))

    # Establish the window of hist data we will be processing in thia run
    hist_lookback = get_lookback(config)
    # We can have a hist_start_date controlled by the planner to exlude some days

    # We want whole weeks so our hist strt date may be upp to 6 days after
    # the planner set hist_start_date.
    max_hist_weeks = (hist_end_date - config.hist_start_date).days // 7
    if hist_lookback:
        max_hist_weeks = min(max_hist_weeks, hist_lookback)

    hist_start_date = hist_end_date - Week(max_hist_weeks) + Day()

    config[
        'hist_start_date'] = hist_start_date  # this overwrites the original value from the item table???
    config['hist_end_date'] = hist_end_date
    config['hist_lookback'] = max_hist_weeks

    config['publish_start_date'] = hist_end_date - Week(
        config.publish_window) + Day()
    config['forecast_end_date'] = hist_end_date + Week(
        process_config.forecast_weeks)

    if config.publish_start_date < config.hist_start_date:
        # we do not construct a baseline  as we do not have enough history
        # everything is going to be satisifed by a proxy

        # we trigger the correct result by setting the
        # hist _start_date to the day afteer the last hist date
        # this will result inan empty hist_df, no geogs_with_hist and no arrays to smooth

        config['hist_start_date'] = config.hist_end_date + Day()

        config['run_baseline_processes'] = False
    else:
        config['run_baseline_processes'] = True

    return config, params
Ejemplo n.º 7
0
def next_update_time(last_updated, freq='D', hour=18, minute=0):
    """计算下次更新时间
    说明:
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将时间调整到指定的hour和minute
    """
    if pd.isnull(last_updated):
        return MARKET_START
    freq = freq.upper()
    if freq == 'D':
        d = BDay(n=1, normalize=True)
        res = last_updated + d
        return res.replace(hour=hour, minute=minute)
    elif freq == 'W':
        w = Week(normalize=True, weekday=0)
        res = last_updated + w
        return res.replace(hour=hour, minute=minute)
    elif freq == 'M':
        m = MonthBegin(n=1, normalize=True)
        res = last_updated + m
        return res.replace(hour=hour, minute=minute)
    elif freq == 'Q':
        q = QuarterBegin(normalize=True, startingMonth=1)
        res = last_updated + q
        return res.replace(hour=hour, minute=minute)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(('D', 'W', 'M', 'Q')))
Ejemplo n.º 8
0
 def _monthly_bd_gen(start_date, end_date):
     date_series = pd.date_range(start_date, end_date, freq='M').to_series()
     date_series = date_series.apply(
         lambda x: x + Week(2, weekday=2))  # 每月第二个周三
     date_str = ','.join(
         date_series.apply(lambda x: x.strftime('%Y/%m/%d')))
     return date_str
Ejemplo n.º 9
0
def prep_FF_weekly(path):

    # Load Fama French three factors
    FF3 = pd.read_csv(path + '\\Data\\F-F_Research_Data_Factors_weekly.csv')

    # Convert date column to date
    FF3['DATE'] = pd.to_datetime(FF3['DATE'].astype(str),
                                 format='%Y%m%d') + Week(weekday=4)

    # Divide columns be 100
    FF3[['Mkt-RF', 'SMB', 'HML', 'RF']] = FF3[['Mkt-RF', 'SMB', 'HML',
                                               'RF']].div(100)

    # Load faux Fama French weekly momentum
    FF_mom = pd.read_csv(path + '\\Data\\F-F_Momentum_Factor_weekly.csv')

    # Convert date column to datetime
    FF_mom['DATE'] = pd.to_datetime(FF_mom['DATE'].astype(str),
                                    format='%m/%d/%Y')

    # Merge Fama French data
    FF = FF3.merge(FF_mom, on='DATE')

    # Record risk free returns
    RF = FF[['DATE', 'RF']]

    # Drop risk free date from Fama French dataframe
    FF.drop('RF', axis=1, inplace=True)

    return FF, RF
Ejemplo n.º 10
0
def test_week_add_invalid():
    # Week with weekday should raise TypeError and _not_ AttributeError
    #  when adding invalid offset
    offset = Week(weekday=1)
    other = Day()
    with pytest.raises(TypeError, match="Cannot add"):
        offset + other
Ejemplo n.º 11
0
def vx_expiry(year, month):
    t = datetime(year, month, 1) + relativedelta(months=1)
    offset = Week(weekday=4)
    if t.weekday() != 4:
        t_new = t + 3 * offset
    else:
        t_new = t + 2 * offset
    t_exp = t_new - timedelta(days=30)
    return t_exp
Ejemplo n.º 12
0
def get_visits(config):
    q = """select
               v.id as id
               ,u.url
               ,u.title
               ,v.visit_time
               ,v.transition
               ,v.visit_duration
               ,v.from_visit
               ,uf.url as from_url
               ,uf.title as from_title
             from visits v
             join urls u on u.id = v.url
             left join visits vf on vf.id=v.from_visit
             left join urls uf on uf.id=vf.url
             order by v.visit_time desc
    """

    visits = pd.read_sql(sql=q,
                         con=sqlite3.connect(
                             os.path.join(config['data_directory'],
                                          'chrome_history')),
                         index_col='id')

    visits['visit_time'] = windows_epoch_to_datetime(visits['visit_time'])
    visits['visit_week'] = visits['visit_time'].apply(
        lambda x: Week(weekday=6, normalize=True).rollback(x))

    # https://developer.chrome.com/extensions/history#type-TransitionType
    transition_types = {
        0: 'link',
        1:
        'typed',  # Typed into search bar (also auto-suggest from search bar)
        2: 'auto_bookmark',
        3: 'auto_subframe',  # Automatic navigation within a subframe
        4: 'manual_subframe',  # Navigation within a subframe
        5:
        'generated',  # Did a search from the search bar, and chose an option)
        6: 'auto_toplevel',
        7: 'form_submit',
        8: 'reload',
        9: 'keyword',
        10: 'keyword_generated'
    }
    visits['transition'] = visits['transition'].apply(
        lambda x: transition_types[x & 0x000000FF])
    idx = visits.transition.isin(
        ['reload', 'form_submit', 'auto_subframe', 'manual_subframe'])
    visits.drop(visits.index[idx], inplace=True)

    # Add a count of unique weeks visited by URL (for some reason to_numeric is needed to convert from a datetime)
    visits['weeks_observed'] = pd.to_numeric(
        visits.groupby('url')['visit_week'].transform('nunique'))

    return visits
Ejemplo n.º 13
0
def resample_index(dat, to_freq):
    '''
    使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错
    :param data:
    :param to_freq:
    :return:
    '''
    data=dat.copy()
    if to_freq=='M':
        data.index = data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()),
                                      data.index + MonthEnd())
    elif to_freq=='W':
        # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误
        week_day=5 #0-6分别对应周一至周日
        data.index=data.index.where(data.index==((data.index+Week(weekday=week_day))-Week()),data.index+Week(weekday=week_day))

    elif to_freq=='Y':
        data.index = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()),
                                      data.index + YearEnd())
    return data
def digest_stocks_features(stock_lst, start_index, finish_index):
    for stock in stock_lst[start_index:finish_index + 1]:
        print("working on {}".format(stock))

        stock_data_df = stocks_data_dict[stock]
        all_signals = dict_of_stocks_fp[dict_of_stocks_fp.symbol == stock]
        # TODO add counter and progress bar or percentage ( curr / len of signals) # TODO per stock per all stocks
        curr_stock_signals_features = pd.DataFrame()
        for signal_index, row in enumerate(all_signals.iterrows()):
            curr_signal_start_date = pd.to_datetime(row[1].start_date)
            signal_label = row[1].false_positive

            curr_signal_month_data = stock_data_df["adj_close"][stock_data_df.index <= curr_signal_start_date].resample(
                "BM").last().interpolate()
            curr_signal_month_data = curr_signal_month_data[
                curr_signal_month_data.index >= curr_signal_start_date - BMonthEnd(100)]

            curr_signal_week_data = stock_data_df["adj_close"][stock_data_df.index <= curr_signal_start_date].resample(
                "W").last().interpolate()
            curr_signal_week_data = curr_signal_week_data[
                curr_signal_week_data.index >= curr_signal_start_date - Week(100)]

            curr_signal_day_data = stock_data_df["adj_close"][stock_data_df.index <= curr_signal_start_date].resample(
                "B").last().interpolate()
            curr_signal_day_data = curr_signal_day_data[
                curr_signal_day_data.index >= curr_signal_start_date - BDay(100)]

            curr_signal_only, curr_signal_features_month = do_features(curr_signal_month_data, curr_signal_start_date,
                                                                       all_signals, stock, signal_index, signal_label
                                                                       )
            curr_signal_only, curr_signal_features_week = do_features(curr_signal_week_data, curr_signal_start_date,
                                                                      all_signals, stock, signal_index, signal_label
                                                                      )
            curr_signal_only, curr_signal_features_day = do_features(curr_signal_day_data, curr_signal_start_date,
                                                                     all_signals, stock, signal_index, signal_label
                                                                     )

            timestamps_features_month[str(stock) + "_" + str(signal_index)] = curr_signal_features_month
            timestamps_features_week[str(stock) + "_" + str(signal_index)] = curr_signal_features_week
            timestamps_features_day[str(stock) + "_" + str(signal_index)] = curr_signal_features_day

            # adding to df of current stock
            curr_stock_signals_features = pd.concat([curr_stock_signals_features, curr_signal_only])

        # disregard the date index
        # still saving it for train/test separations (e.g. learn until 2010 and test afterwards)
        curr_stock_signals_features = curr_stock_signals_features.reset_index()

        with lock:
            # add to the list of all signals of all stocks
            print("finished {}".format(stock))
            all_stocks_signals_features[stock] = curr_stock_signals_features

    print("finished stocks features")
Ejemplo n.º 15
0
    def __init__(
        self,
        num_timeseries: int = 10,
        num_steps: int = 30,
        freq: str = "1H",
        start: str = "2000-01-01 00:00:00",
        # Generates constant dataset of 0s with explicit NaN missing values
        is_nan: bool = False,
        # Inserts random constant value for each time series
        is_random_constant: bool = False,
        # Generates constants on various scales
        is_different_scales: bool = False,
        # Determines whether the time series in the test
        # and train set should have different constant values
        is_piecewise: bool = False,
        # Determines whether to add Gaussian noise to the constant dataset
        is_noise: bool = False,
        # Determines whether some time series will have very long lengths
        is_long: bool = False,
        # Determines whether some time series will have very short lengths
        is_short: bool = False,
        # Determines whether to add linear trends
        is_trend: bool = False,
        # Number of missing values in the middle of the time series
        num_missing_middle: int = 0,
        # Determines whether to add promotions to the target time series
        # and to store in metadata
        is_promotions: bool = False,
        # Determines whether to add holidays to the target time series
        # and to store in metadata
        holidays: Optional[List[pd.Timestamp]] = None,
    ) -> None:
        super().__init__(freq)
        self.num_timeseries = num_timeseries
        self.num_steps = num_steps
        self.num_training_steps = self.num_steps // 10 * 8
        self.prediction_length = self.num_steps - self.num_training_steps
        self.is_nan = is_nan
        self.is_random_constant = is_random_constant
        self.is_different_scales = is_different_scales
        self.is_piecewise = is_piecewise
        self.is_noise = is_noise
        self.is_long = is_long
        self.is_short = is_short
        self.is_trend = is_trend
        self.num_missing_middle = num_missing_middle
        self.is_promotions = is_promotions
        self.holidays = holidays

        if isinstance(self.freq, Week):
            self.freq = Week(
                self.freq.n, weekday=pd.Timestamp(start).weekday()
            )
        self.start = cast(pd.Period, pd.Period(start, self.freq))
Ejemplo n.º 16
0
def info_process(dic, contract_type):
    """Get options according to the inputs."""
    target_date = datetime.strptime(
        dic["Target Date"], "%Y-%m-%d")  # Target date to datetime object
    if target_date.isoweekday(
    ) != 5:  # The target date is moved to next Friday if it's not.
        target_date = target_date + Week(weekday=4)
    ls = []
    while True:  # Use a loop to store options on different target dates to a list.
        if len(ls) == 3:  # Look into 3 available target dates in the future.
            break
        try:
            # If options available on a target date, append the dataframe to the list
            # then move the target date to next Friday
            ls.append(get_options(dic["Ticker"], target_date, contract_type))
            target_date = target_date + Week(weekday=4)
        except:
            # If there is no options available, directly move to next Friday.
            target_date = target_date + Week(weekday=4)
    df_options = pd.concat(ls)  # Concatenate the dataframe into one.
    df_options.columns = ["Strike", "Price", "Volatility",
                          "Expiration Date"]  # Change column names
    return df_options
 def to_offset(self) -> DateOffset:
     if self.value == "H":
         return Hour(1)
     elif self.value == "D":
         return Day(1)
     elif self.value == "W-MON":
         return Week(1, weekday=0)
     elif self.value == "MS":
         return MonthBegin(1)
     elif self.value == "QS-DEC":
         return QuarterBegin(startingMonth=10)
     elif self.value == "AS":
         return YearBegin(1)
     raise NotImplementedError(self.value)
Ejemplo n.º 18
0
def test_get_offset():
    with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
        _get_offset("gibberish")
    with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
        _get_offset("QS-JAN-B")

    pairs = [
        ("B", BDay()),
        ("b", BDay()),
        ("bm", BMonthEnd()),
        ("Bm", BMonthEnd()),
        ("W-MON", Week(weekday=0)),
        ("W-TUE", Week(weekday=1)),
        ("W-WED", Week(weekday=2)),
        ("W-THU", Week(weekday=3)),
        ("W-FRI", Week(weekday=4)),
    ]

    for name, expected in pairs:
        offset = _get_offset(name)
        assert offset == expected, (
            f"Expected {repr(name)} to yield {repr(expected)} "
            f"(actual: {repr(offset)})")
Ejemplo n.º 19
0
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0):
    """计算下次更新时间
    说明:
        'S':移动到下一秒
        'm':移动到下一分钟
        'H':移动到下一小时
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将时间调整到指定的hour和minute
    """
    if pd.isnull(last_updated):
        return MARKET_START
    if freq == 'S':
        off = Second()
        return last_updated + off
    elif freq == 'm':
        off = Minute()
        return last_updated + off
    elif freq == 'H':
        off = Hour()
        return last_updated + off
    elif freq == 'D':
        d = BDay(n=1, normalize=True)
        res = last_updated + d
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'W':
        w = Week(normalize=True, weekday=0)
        res = last_updated + w
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'M':
        m = MonthBegin(n=1, normalize=True)
        res = last_updated + m
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'Q':
        q = QuarterBegin(normalize=True, startingMonth=1)
        res = last_updated + q
        return res.replace(hour=hour, minute=minute, second=second)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(
            ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
Ejemplo n.º 20
0
 def __init__(self,
              n=1,
              normalize=False,
              weekmask='Mon Tue Wed Thu Fri',
              holidays=None,
              calendar=None,
              **kwds):
     self.n = n
     self.normalized = normalize
     self.kwds.update(kwds)
     self.offset = kwds.get('offset', timedelta(0))
     self.cbday = CustomBusinessDay(n=1,
                                    normalize=normalize,
                                    weekmask=weekmask,
                                    holidays=holidays,
                                    calendar=calendar,
                                    offset=self.offset)
     self.calendar = self.cbday.calendar
     self.holidays = holidays
     self.w_offset = Week(weekday=4)
Ejemplo n.º 21
0
def test_weeks_onoffset():
    # GH#18510 Week with weekday = None, normalize = False should always
    # be is_on_offset
    offset = Week(n=2, weekday=None)
    ts = Timestamp("1862-01-13 09:03:34.873477378+0210", tz="Africa/Lusaka")
    fast = offset.is_on_offset(ts)
    slow = (ts + offset) - offset == ts
    assert fast == slow

    # negative n
    offset = Week(n=2, weekday=None)
    ts = Timestamp("1856-10-24 16:18:36.556360110-0717", tz="Pacific/Easter")
    fast = offset.is_on_offset(ts)
    slow = (ts + offset) - offset == ts
    assert fast == slow
Ejemplo n.º 22
0
 def __init__(self,
              n=1,
              normalize=False,
              weekmask='Mon Tue Wed Thu Fri',
              holidays=None,
              calendar=None,
              **kwds):
     self.n = n
     object.__setattr__(self, "normalized", normalize)
     self.kwds.update(kwds)
     object.__setattr__(self, "offset", kwds.get('offset', timedelta(0)))
     object.__setattr__(
         self, "cbday",
         CustomBusinessDay(n=1,
                           normalize=normalize,
                           weekmask=weekmask,
                           holidays=holidays,
                           calendar=calendar,
                           offset=self.offset))
     object.__setattr__(self, "calendar", self.cbday.calendar)
     object.__setattr__(self, "holidays", holidays)
     object.__setattr__(self, "w_offset", Week(weekday=4))
Ejemplo n.º 23
0
    'BQS-JAN' : BQuarterBegin(startingMonth=1),
    'BQS'     : BQuarterBegin(startingMonth=1),
    'BQS-FEB' : BQuarterBegin(startingMonth=2),
    'BQS-MAR' : BQuarterBegin(startingMonth=3),
    'BQS-APR' : BQuarterBegin(startingMonth=4),
    'BQS-MAY' : BQuarterBegin(startingMonth=5),
    'BQS-JUN' : BQuarterBegin(startingMonth=6),
    'BQS-JUL' : BQuarterBegin(startingMonth=7),
    'BQS-AUG' : BQuarterBegin(startingMonth=8),
    'BQS-SEP' : BQuarterBegin(startingMonth=9),
    'BQS-OCT' : BQuarterBegin(startingMonth=10),
    'BQS-NOV' : BQuarterBegin(startingMonth=11),
    'BQS-DEC' : BQuarterBegin(startingMonth=12),

    # Weekly
    'W-MON' : Week(weekday=0),
    'W-TUE' : Week(weekday=1),
    'W-WED' : Week(weekday=2),
    'W-THU' : Week(weekday=3),
    'W-FRI' : Week(weekday=4),
    'W-SAT' : Week(weekday=5),
    'W-SUN' : Week(weekday=6),

}

_offset_to_period_map = {
    'WEEKDAY' : 'D',
    'EOM' : 'M',
    'B' : 'D',
    'BM' : 'M',
    'BQS' : 'Q',
Ejemplo n.º 24
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E': [0., 1, Timestamp('20100101'), 'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                          names=['first', 'second']))

    series = dict(float=Series(data['A']),
                  int=Series(data['B']),
                  mixed=Series(data['E']),
                  ts=Series(np.arange(10).astype(np.int64),
                            index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(tuple(
                                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                                         names=['one',
                                                                'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=['A', 'B', 'C', 'D', 'A']),
                  cat=Series(Categorical(['foo', 'bar', 'baz'])),
                  dt=Series(date_range('20130101', periods=5)),
                  dt_tz=Series(
                      date_range('20130101', periods=5, tz='US/Eastern')),
                  period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(float=DataFrame({
        'A': series['float'],
        'B': series['float'] + 1
    }),
                 int=DataFrame({
                     'A': series['int'],
                     'B': series['int'] + 1
                 }),
                 mixed=DataFrame({k: data[k]
                                  for k in ['A', 'B', 'C', 'D']}),
                 mi=DataFrame(
                     {
                         'A': np.arange(5).astype(np.float64),
                         'B': np.arange(5).astype(np.int64)
                     },
                     index=MultiIndex.from_tuples(tuple(
                         zip(*[['bar', 'bar', 'baz', 'baz', 'baz'],
                               ['one', 'two', 'one', 'two', 'three']])),
                                                  names=['first', 'second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=['A', 'B', 'A']),
                 cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}),
                 cat_and_float=DataFrame({
                     'A':
                     Categorical(['foo', 'bar', 'baz']),
                     'B':
                     np.arange(3).astype(np.int64)
                 }),
                 mixed_dup=mixed_dup_df,
                 dt_mixed_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET')
                     },
                     index=range(5)),
                 dt_mixed2_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET'),
                         'C': Timestamp('20130603', tz='UTC')
                     },
                     index=range(5)))

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
def create_data():
    """ create the pickle/msgpack data """

    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )
Ejemplo n.º 26
0
    'Zweiter Weihnachtstag':
    Holiday('Zweiter Weihnachtstag', month=12, day=26),
    'Heilige Drei Könige':
    Holiday('Heilige Drei Könige', month=1, day=6),
    'Mariä Himmelfahrt':
    Holiday('Mariä Himmelfahrt', month=8, day=15),
    'Tag der Deutschen Einheit':
    Holiday('Tag der Deutschen Einheit', month=10, day=3),
    'Reformationstag':
    Holiday('Reformationstag', month=10, day=31),
    '500. Reformationstag':
    Holiday('Reformationstag', year=2017, month=10, day=31),
    'Allerheiligen':
    Holiday('Allerheiligen', month=11, day=1),
    'Buß- und Bettag':
    Holiday('Buß- und Bettag', month=11, day=15, offset=[Week(weekday=2)]),
}

HOLIDAY_EXCLUDE_MAP = {
    'BW': {'Mariä Himmelfahrt', 'Reformationstag', 'Buß- und Bettag'},
    'BY': {'Reformationstag', 'Buß- und Bettag'},
    'BE': {
        'Heilige Drei Könige', 'Fronleichnam', 'Mariä Himmelfahrt',
        'Reformationstag', 'Allerheiligen', 'Buß- und Bettag'
    },
    'BB': {
        'Heilige Drei Könige', 'Fronleichnam', 'Mariä Himmelfahrt',
        'Allerheiligen', 'Buß- und Bettag'
    },
    'HB': {
        'Heilige Drei Könige', 'Fronleichnam', 'Mariä Himmelfahrt',
Ejemplo n.º 27
0
def returns(ts, calc_type='D', force=False):
    """ Calculate returns time series of returns for various time windows.

    :param ts: :py:obj:`TimeSeries`, :py:obj:`pandas.Series`, :py:obj:`pandas.DataFrame`
        Time series whose returns will be calculated.
    :param calc_type: {'D', 'W', 'M', '6M', 'Y', '3Y', 'WTD', 'MTD', 'YTD', 'SI'}, optional
        The time window for return calculation. Default is 'D' (daily returns).
    :param force: bool, optional
        Backward-fill missing data. Default is False.
    :return :py:obj:`pandas.Series`, :py:obj:`pandas.DataFrame`
        Series or DataFrame of returns.
    """
    if isinstance(ts, TimeSeries):
        df = ts.ts_values
    else:
        df = ts
    if df.empty:
        return df

    first_index = df.first_valid_index()
    last_index = df.last_valid_index()

    def array_return(x):
        return x[-1] / x[0] - 1

    def one_month_ago(x):
        return to_datetime(to_ql_date(x) - ql.Period(1, ql.Months))

    def six_months_ago(x):
        return to_datetime(to_ql_date(x) - ql.Period(6, ql.Months))

    calc_type = calc_type.upper()
    if calc_type == 'D':
        return df.pct_change()
    elif calc_type == 'W':
        df = df.reindex()
        return df.resample(BDay()).fillna(method='pad').rolling(
            6, min_periods=2).apply(array_return)
    elif calc_type == 'M':
        one_month_ago = df.index.map(one_month_ago)
        df_one_month_ago = df.reindex(one_month_ago, method='pad')
        if force:
            df_one_month_ago = df_one_month_ago.fillna(df.loc[first_index])
        return pd.Series(index=df.index,
                         data=df.values / df_one_month_ago.values) - 1
    elif calc_type == '6M':
        six_months_ago = df.index.map(six_months_ago)
        df_six_months_ago = df.reindex(six_months_ago, method='pad')
        if force is True:
            df_six_months_ago = df_six_months_ago.fillna(df.loc[first_index])
        return pd.Series(index=df.index,
                         data=df.values / df_six_months_ago.values) - 1
    elif calc_type == 'Y':
        one_year_ago = df.index - pd.DateOffset(years=1)
        df_one_year_ago = df.reindex(one_year_ago, method='pad')
        if force is True:
            df_one_year_ago = df_one_year_ago.fillna(df.loc[first_index])
        return pd.Series(index=df.index,
                         data=df.values / df_one_year_ago.values) - 1
    elif calc_type == '3Y':
        three_years_ago = df.index - pd.dateOffset(years=3)
        df_three_years_ago = df.reindex(three_years_ago, method='pad')
        if force:
            df_three_years_ago = df_three_years_ago.fillna(df.loc[first_index])
        return pd.Series(index=df.index,
                         data=df.values / df_three_years_ago.values) - 1
    elif calc_type == 'WTD':
        index = pd.date_range(first_index, last_index, freq=Week(weekday=4))
        df_week_end = df.reindex(index, method='pad').reindex(df.index,
                                                              method='pad')
        return df / df_week_end - 1
    elif calc_type == 'MTD':
        index = pd.date_range(first_index, last_index, freq=BMonthEnd())
        df_month_end = df.reindex(index, method='pad').reindex(df.index,
                                                               method='pad')
        return df / df_month_end - 1
    elif calc_type == 'YTD':
        index = pd.date_range(first_index, last_index, freq=BYearEnd())
        df_year_end = df.reindex(index, method='pad').reindex(df.index,
                                                              method='pad')
        return df / df_year_end - 1
    elif calc_type == 'SI':
        return df / df.loc[first_index] - 1
Ejemplo n.º 28
0
def test_get_offset_legacy():
    pairs = [("w@Sat", Week(weekday=5))]
    for name, expected in pairs:
        with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
            _get_offset(name)
Ejemplo n.º 29
0
# Calculate ME_lag; not lagged yet however
columns['inv_ME_lag'] = 1/(columns['TOTAL_SHARES'] * columns['PRICE_UNADJUSTED'])

# Create ME dataframe
ME = columns[['DATE', 'DW_INSTRUMENT_ID', 'MAIN_KEY', 'inv_ME_lag']]

# Drop unneeded columns
columns.drop(['TOTAL_SHARES', 'PRICE_UNADJUSTED', 'inv_ME_lag'], axis = 1, inplace = True)

# Obtain year
fundamentals['year'] = fundamentals['DATE'].dt.year
fundamentals['month'] = fundamentals['DATE'].dt.month

# For the monthly ones we need to shift the dates a month minus one week forward
faux_date = columns['DATE'] + dt.timedelta(days = 7) + MonthEnd(0) - dt.timedelta(days = 6) + Week(weekday = 4)
columns['year'] = faux_date.dt.year

# Create year and month columns for ME
ME['year'] = faux_date.dt.year
ME['month'] = faux_date.dt.month

# Drop columns
columns.drop('DATE', axis = 1, inplace = True)
ME.drop('DATE', axis = 1, inplace = True)

# Drop values with missing instrument ID, gvkey, or return
ME.dropna(subset = ['inv_ME_lag'], axis = 0, how = 'any', inplace = True)

# Merge columns with fundamentals
fundamentals = fundamentals.merge(columns, on = ['year', 'DW_INSTRUMENT_ID'])
Ejemplo n.º 30
0
stocks_month = pd.read_csv(path + '\\Data\\monthly_ret.csv')

# Convert date column to datetime object
stocks_week['DATE'] = pd.to_datetime(stocks_week['DATE'].astype(str),
                                     format="%Y-%m-%d")

stocks_month['DATE'] = pd.to_datetime(stocks_month['DATE'].astype(str),
                                      format="%Y-%m-%d")

# Create year and month columns
stocks_week['year'] = stocks_week['DATE'].dt.year
stocks_week['month'] = stocks_week['DATE'].dt.month

# For the monthly ones we need to shift the dates a month minus one week forward
faux_date = stocks_month['DATE'] + dt.timedelta(
    days=7) + MonthEnd(0) - dt.timedelta(days=6) + Week(weekday=4)
stocks_month['year'] = faux_date.dt.year
stocks_month['month'] = faux_date.dt.month

del faux_date

# Only consider observations before 2010
stocks_week = stocks_week.loc[stocks_week['year'] < 2010, :]

# Save the good columns of stocks_month
columns = stocks_month[[
    'year', 'month', 'DW_INSTRUMENT_ID', 'MAIN_KEY', 'INDUSTRY',
    'PRICE_UNADJUSTED', 'TOT_EQUITY'
]]

# Merge the columns with stocks_week