def get_from_freq(cls, freq: str, add_trend: bool = DEFAULT_ADD_TREND): offset = to_offset(freq) seasonal_issms: List[SeasonalityISSM] = [] if offset.name == "M": seasonal_issms = [MonthOfYearSeasonalISSM()] elif norm_freq_str(offset.name) == "W": seasonal_issms = [WeekOfYearSeasonalISSM()] elif offset.name == "D": seasonal_issms = [DayOfWeekSeasonalISSM()] elif offset.name == "B": # TODO: check this case seasonal_issms = [DayOfWeekSeasonalISSM()] elif offset.name == "H": seasonal_issms = [ HourOfDaySeasonalISSM(), DayOfWeekSeasonalISSM(), ] elif offset.name == "T": seasonal_issms = [ MinuteOfHourSeasonalISSM(), HourOfDaySeasonalISSM(), ] else: RuntimeError(f"Unsupported frequency {offset.name}") return cls(seasonal_issms=seasonal_issms, add_trend=add_trend)
def get_seasonality(freq: str, seasonalities=DEFAULT_SEASONALITIES) -> int: """Return the seasonality of a given frequency: >>> get_seasonality("2H") 12 """ offset = pd.tseries.frequencies.to_offset(freq) base_seasonality = seasonalities.get(norm_freq_str(offset.name), 1) seasonality, remainder = divmod(base_seasonality, offset.n) if not remainder: return seasonality logger.warning(f"Multiple {offset.n} does not divide base seasonality " f"{base_seasonality}. Falling back to seasonality 1.") return 1
def fourier_time_features_from_frequency(freq_str: str) -> List[TimeFeature]: offset = to_offset(freq_str) granularity = norm_freq_str(offset.name) features = { "M": ["weekofyear"], "W": ["daysinmonth", "weekofyear"], "D": ["dayofweek"], "B": ["dayofweek", "dayofyear"], "H": ["hour", "dayofweek"], "min": ["minute", "hour", "dayofweek"], "T": ["minute", "hour", "dayofweek"], } assert granularity in features, f"freq {granularity} not supported" feature_classes: List[TimeFeature] = [ FourierDateFeatures(freq=freq) for freq in features[granularity] ] return feature_classes
def longest_period_from_frequency_str(freq_str: str) -> int: offset = to_offset(freq_str) return FREQ_LONGEST_PERIOD_DICT[norm_freq_str(offset.name)] // offset.n
def get_lags_for_frequency(freq_str: str, lag_ub: int = 1200, num_lags: Optional[int] = None) -> List[int]: """ Generates a list of lags that that are appropriate for the given frequency string. By default all frequencies have the following lags: [1, 2, 3, 4, 5, 6, 7]. Remaining lags correspond to the same `season` (+/- `delta`) in previous `k` cycles. Here `delta` and `k` are chosen according to the existing code. Parameters ---------- freq_str Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. lag_ub The maximum value for a lag. num_lags Maximum number of lags; by default all generated lags are returned """ # Lags are target values at the same `season` (+/- delta) but in the # previous cycle. def _make_lags_for_minute(multiple, num_cycles=3): # We use previous ``num_cycles`` hours to generate lags return [ _make_lags(k * 60 // multiple, 2) for k in range(1, num_cycles + 1) ] def _make_lags_for_hour(multiple, num_cycles=7): # We use previous ``num_cycles`` days to generate lags return [ _make_lags(k * 24 // multiple, 1) for k in range(1, num_cycles + 1) ] def _make_lags_for_day(multiple, num_cycles=4): # We use previous ``num_cycles`` weeks to generate lags # We use the last month (in addition to 4 weeks) to generate lag. return [ _make_lags(k * 7 // multiple, 1) for k in range(1, num_cycles + 1) ] + [_make_lags(30 // multiple, 1)] def _make_lags_for_week(multiple, num_cycles=3): # We use previous ``num_cycles`` years to generate lags # Additionally, we use previous 4, 8, 12 weeks return [ _make_lags(k * 52 // multiple, 1) for k in range(1, num_cycles + 1) ] + [[4 // multiple, 8 // multiple, 12 // multiple]] def _make_lags_for_month(multiple, num_cycles=3): # We use previous ``num_cycles`` years to generate lags return [ _make_lags(k * 12 // multiple, 1) for k in range(1, num_cycles + 1) ] # multiple, granularity = get_granularity(freq_str) offset = to_offset(freq_str) # normalize offset name, so that both `W` and `W-SUN` refer to `W` offset_name = norm_freq_str(offset.name) if offset_name == "A": lags = [] elif offset_name == "Q": assert ( offset.n == 1 ), "Only multiple 1 is supported for quarterly. Use x month instead." lags = _make_lags_for_month(offset.n * 3.0) elif offset_name == "M": lags = _make_lags_for_month(offset.n) elif offset_name == "W": lags = _make_lags_for_week(offset.n) elif offset_name == "D": lags = _make_lags_for_day(offset.n) + _make_lags_for_week( offset.n / 7.0) elif offset_name == "B": # todo find good lags for business day lags = [] elif offset_name == "H": lags = (_make_lags_for_hour(offset.n) + _make_lags_for_day(offset.n / 24.0) + _make_lags_for_week(offset.n / (24.0 * 7))) # minutes elif offset_name == "T": lags = (_make_lags_for_minute(offset.n) + _make_lags_for_hour(offset.n / 60.0) + _make_lags_for_day(offset.n / (60.0 * 24)) + _make_lags_for_week(offset.n / (60.0 * 24 * 7))) else: raise Exception("invalid frequency") # flatten lags list and filter lags = [ int(lag) for sub_list in lags for lag in sub_list if 7 < lag <= lag_ub ] lags = [1, 2, 3, 4, 5, 6, 7] + sorted(list(set(lags))) return lags[:num_lags]