Esempio n. 1
0
    def get_from_freq(cls, freq: str, add_trend: bool = DEFAULT_ADD_TREND):
        offset = to_offset(freq)

        seasonal_issms: List[SeasonalityISSM] = []

        if offset.name == "M":
            seasonal_issms = [MonthOfYearSeasonalISSM()]
        elif norm_freq_str(offset.name) == "W":
            seasonal_issms = [WeekOfYearSeasonalISSM()]
        elif offset.name == "D":
            seasonal_issms = [DayOfWeekSeasonalISSM()]
        elif offset.name == "B":  # TODO: check this case
            seasonal_issms = [DayOfWeekSeasonalISSM()]
        elif offset.name == "H":
            seasonal_issms = [
                HourOfDaySeasonalISSM(),
                DayOfWeekSeasonalISSM(),
            ]
        elif offset.name == "T":
            seasonal_issms = [
                MinuteOfHourSeasonalISSM(),
                HourOfDaySeasonalISSM(),
            ]
        else:
            RuntimeError(f"Unsupported frequency {offset.name}")

        return cls(seasonal_issms=seasonal_issms, add_trend=add_trend)
Esempio n. 2
0
def get_seasonality(freq: str, seasonalities=DEFAULT_SEASONALITIES) -> int:
    """Return the seasonality of a given frequency:

    >>> get_seasonality("2H")
    12

    """
    offset = pd.tseries.frequencies.to_offset(freq)

    base_seasonality = seasonalities.get(norm_freq_str(offset.name), 1)

    seasonality, remainder = divmod(base_seasonality, offset.n)
    if not remainder:
        return seasonality

    logger.warning(f"Multiple {offset.n} does not divide base seasonality "
                   f"{base_seasonality}. Falling back to seasonality 1.")
    return 1
Esempio n. 3
0
def fourier_time_features_from_frequency(freq_str: str) -> List[TimeFeature]:
    offset = to_offset(freq_str)
    granularity = norm_freq_str(offset.name)

    features = {
        "M": ["weekofyear"],
        "W": ["daysinmonth", "weekofyear"],
        "D": ["dayofweek"],
        "B": ["dayofweek", "dayofyear"],
        "H": ["hour", "dayofweek"],
        "min": ["minute", "hour", "dayofweek"],
        "T": ["minute", "hour", "dayofweek"],
    }

    assert granularity in features, f"freq {granularity} not supported"

    feature_classes: List[TimeFeature] = [
        FourierDateFeatures(freq=freq) for freq in features[granularity]
    ]
    return feature_classes
Esempio n. 4
0
def longest_period_from_frequency_str(freq_str: str) -> int:
    offset = to_offset(freq_str)
    return FREQ_LONGEST_PERIOD_DICT[norm_freq_str(offset.name)] // offset.n
Esempio n. 5
0
def get_lags_for_frequency(freq_str: str,
                           lag_ub: int = 1200,
                           num_lags: Optional[int] = None) -> List[int]:
    """
    Generates a list of lags that that are appropriate for the given frequency
    string.

    By default all frequencies have the following lags: [1, 2, 3, 4, 5, 6, 7].
    Remaining lags correspond to the same `season` (+/- `delta`) in previous
    `k` cycles. Here `delta` and `k` are chosen according to the existing code.

    Parameters
    ----------

    freq_str
        Frequency string of the form [multiple][granularity] such as "12H",
        "5min", "1D" etc.

    lag_ub
        The maximum value for a lag.

    num_lags
        Maximum number of lags; by default all generated lags are returned
    """

    # Lags are target values at the same `season` (+/- delta) but in the
    # previous cycle.
    def _make_lags_for_minute(multiple, num_cycles=3):
        # We use previous ``num_cycles`` hours to generate lags
        return [
            _make_lags(k * 60 // multiple, 2)
            for k in range(1, num_cycles + 1)
        ]

    def _make_lags_for_hour(multiple, num_cycles=7):
        # We use previous ``num_cycles`` days to generate lags
        return [
            _make_lags(k * 24 // multiple, 1)
            for k in range(1, num_cycles + 1)
        ]

    def _make_lags_for_day(multiple, num_cycles=4):
        # We use previous ``num_cycles`` weeks to generate lags
        # We use the last month (in addition to 4 weeks) to generate lag.
        return [
            _make_lags(k * 7 // multiple, 1) for k in range(1, num_cycles + 1)
        ] + [_make_lags(30 // multiple, 1)]

    def _make_lags_for_week(multiple, num_cycles=3):
        # We use previous ``num_cycles`` years to generate lags
        # Additionally, we use previous 4, 8, 12 weeks
        return [
            _make_lags(k * 52 // multiple, 1)
            for k in range(1, num_cycles + 1)
        ] + [[4 // multiple, 8 // multiple, 12 // multiple]]

    def _make_lags_for_month(multiple, num_cycles=3):
        # We use previous ``num_cycles`` years to generate lags
        return [
            _make_lags(k * 12 // multiple, 1)
            for k in range(1, num_cycles + 1)
        ]

    # multiple, granularity = get_granularity(freq_str)
    offset = to_offset(freq_str)
    # normalize offset name, so that both `W` and `W-SUN` refer to `W`
    offset_name = norm_freq_str(offset.name)

    if offset_name == "A":
        lags = []
    elif offset_name == "Q":
        assert (
            offset.n == 1
        ), "Only multiple 1 is supported for quarterly. Use x month instead."
        lags = _make_lags_for_month(offset.n * 3.0)
    elif offset_name == "M":
        lags = _make_lags_for_month(offset.n)
    elif offset_name == "W":
        lags = _make_lags_for_week(offset.n)
    elif offset_name == "D":
        lags = _make_lags_for_day(offset.n) + _make_lags_for_week(
            offset.n / 7.0)
    elif offset_name == "B":
        # todo find good lags for business day
        lags = []
    elif offset_name == "H":
        lags = (_make_lags_for_hour(offset.n) +
                _make_lags_for_day(offset.n / 24.0) +
                _make_lags_for_week(offset.n / (24.0 * 7)))
    # minutes
    elif offset_name == "T":
        lags = (_make_lags_for_minute(offset.n) +
                _make_lags_for_hour(offset.n / 60.0) +
                _make_lags_for_day(offset.n / (60.0 * 24)) +
                _make_lags_for_week(offset.n / (60.0 * 24 * 7)))
    else:
        raise Exception("invalid frequency")

    # flatten lags list and filter
    lags = [
        int(lag) for sub_list in lags for lag in sub_list if 7 < lag <= lag_ub
    ]
    lags = [1, 2, 3, 4, 5, 6, 7] + sorted(list(set(lags)))

    return lags[:num_lags]