Example #1
0
    def ft_skewness_sdiff(cls,
                          ts: np.ndarray,
                          method: int = 3,
                          unbiased: bool = False,
                          ts_period: t.Optional[int] = None) -> float:
        """Seasonal skewness of the first-order differenced time-series.

        This method calculates the skewness of the first-order differenced
        time-series, lagged with its period.

        If the time-series is not seasonal, then its period is assumed to be 1.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        method : int, optional (default=3)
            Defines the strategy used for estimate data skewness. This argument
            is used fo compatibility with R package `e1071`. The options must
            be one of the following:

            +--------+-----------------------------------------------+
            |Option  | Formula                                       |
            +--------+-----------------------------------------------+
            |1       | Skew_1 = m_3 / m_2**(3/2)                     |
            |        | (default of ``scipy.stats``)                  |
            +--------+-----------------------------------------------+
            |2       | Skew_2 = Skew_1 * sqrt(n(n-1)) / (n-2)        |
            +--------+-----------------------------------------------+
            |3       | Skew_3 = m_3 / s**3 = Skew_1 ((n-1)/n)**(3/2) |
            +--------+-----------------------------------------------+

            Where `n` is the number of instances in ``ts``, `s` is the standard
            deviation of each attribute in ``ts``, and `m_i` is the ith
            statistical momentum of each attribute in ``ts``.

            Note that if the selected method is unable to be calculated due to
            division by zero, then the first method will be used instead.

        unbiased : bool, optional
            If True, then the calculations are corrected for statistical bias.

        ts_period : int, optional
            Time-series period. Used to take advantage of precomputations.

        Returns
        -------
        float
            Skewness of the first-order difference of the lagged time-series
            by its own period.
        """
        _ts_period = _period.get_ts_period(ts=ts, ts_period=ts_period)
        ts_sdiff = ts[_ts_period:] - ts[:-_ts_period]
        ts_skew = _summary.sum_skewness(values=ts_sdiff,
                                        method=method,
                                        bias=not unbiased)

        return float(ts_skew)
Example #2
0
    def precompute_period(cls, ts: np.ndarray, **kwargs) -> t.Dict[str, int]:
        """Precompute the time-series period.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        kwargs:
            Additional arguments and previous precomputed items. May
            speed up this precomputation.

        Returns
        -------
        dict
            The following precomputed item is returned:
                * ``ts_period`` (:obj:`int`): time-series period.
        """
        precomp_vals = {}  # type: t.Dict[str, int]

        if "ts_period" not in kwargs:
            precomp_vals["ts_period"] = _period.get_ts_period(ts=ts)

        return precomp_vals
Example #3
0
def decompose(ts: t.Union[np.ndarray, pd.core.series.Series],
              ts_period: t.Optional[int] = None,
              plot: bool = False) -> t.Tuple[np.ndarray, ...]:
    """Decompose a time-series into separated additive components.

    If the time-series is seasonal (period > 1), then it is used the STL
    (Seasonal-Trend Decomposition Procedure Based on Loess) algorithm.
    Otherwise (period <= 1), it is used the Friedman's Super Smoother
    algorithm.

    Parameters
    ----------
    ts : :obj:`np.ndarray` or :obj:`pd.core.series.Series`
        One-dimensional time-series values.

    ts_period : int, optional
        Time-series period. If not given, it is estimated using the minima in
        the autocorrelation function from the detrended time-series using
        Friedman's Super Smoother. If the estimated lag is less or equal 1,
        then it is simply returned the previously decomposed version.
        Otherwise, it is used STL decomposition afterwards in the original
        time-series.

    plot : bool, optional (default=False)
        If True, plot the decomposed components.

    References
    ----------
    .. [1] Friedman, J. H. 1984, A variable span scatterplot smoother
        Laboratory for Computational Statistics, Stanford University
        Technical Report No. 5. Available at:
        https://www.slac.stanford.edu/pubs/slacpubs/3250/slac-pub-3477.pdf
        Accessed on May 12 2020.
    .. [2] Cleveland, R. B., Cleveland, W. S., McRae, J. E. & Terpenning, I.
        (1990). STL: A Seasonal-Trend Decomposition Procedure Based on
        Loess (with Discussion). Journal of Official Statistics, 6, 3--73.
    """
    ssmoother_comps = None

    if ts_period is None:
        ssmoother_comps = _decompose_ssmoother(ts=ts)
        ts_period = _period.get_ts_period(ts, ts_detrended=ssmoother_comps[2])

    if ts_period <= 1:
        if ssmoother_comps is None:
            ssmoother_comps = _decompose_ssmoother(ts=ts, plot=plot)

        components = ssmoother_comps

    else:
        components = _decompose_stl(ts=ts, ts_period=ts_period, plot=plot)

    comp_trend, comp_season, comp_resid = components

    if isinstance(comp_trend, pd.core.series.Series):
        comp_trend = comp_trend.values

    if isinstance(comp_season, pd.core.series.Series):
        comp_season = comp_season.values

    if isinstance(comp_resid, pd.core.series.Series):
        comp_resid = comp_resid.values

    return comp_trend, comp_season, comp_resid
Example #4
0
    def ft_kurtosis_sdiff(cls,
                          ts: np.ndarray,
                          method: int = 3,
                          unbiased: bool = False,
                          ts_period: t.Optional[int] = None) -> float:
        """Seasonal kurtosis of the first-order differenced time-series.

        This method calculates the kurtosis of the first-order differenced
        time-series, lagged with its period.

        If the time-series is not seasonal, then its period is assumed to be 1.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        method : int, optional (default=3)
            Defines the strategy used for estimate data kurtosis. Used for
            total compatibility with R package ``e1071``. This option must be
            one of the following:

            +--------+-----------------------------------------------+
            |Method  | Formula                                       |
            +--------+-----------------------------------------------+
            |1       | Kurt_1 = (m_4 / m_2**2 - 3)                   |
            |        | (default of `scipy.stats` package)            |
            +--------+-----------------------------------------------+
            |2       | Kurt_2 = (((n+1) * Kurt_1 + 6) * (n-1) / f_2),|
            |        | f_2 = ((n-2)*(n-3))                           |
            +--------+-----------------------------------------------+
            |3       | Kurt_3 = (m_4 / s**4 - 3)                     |
            |        |        = ((Kurt_1+3) * (1 - 1/n)**2 - 3)      |
            +--------+-----------------------------------------------+

            Where `n` is the number of instances in ``ts``, `s` is the standard
            deviation of each attribute in ``ts``, and `m_i` is the ith
            statistical momentum of each attribute in ``ts``.

            Note that if the selected method is unable to be calculated due
            to division by zero, then the first method is used instead.

        unbiased : bool, optional
            If True, then the calculations are corrected for statistical bias.

        ts_period : int, optional
            Time-series period. Used to take advantage of precomputations.

        Returns
        -------
        float
            Kurtosis of the first-order difference of the lagged time-series
            by its own period.
        """
        _ts_period = _period.get_ts_period(ts=ts, ts_period=ts_period)
        ts_sdiff = ts[_ts_period:] - ts[:-_ts_period]
        ts_kurt = _summary.sum_kurtosis(values=ts_sdiff,
                                        method=method,
                                        bias=not unbiased)

        return float(ts_kurt)
Example #5
0
def _test() -> None:
    if len(sys.argv) <= 3:
        print("usage:", sys.argv[0], "<data_id> <random_seed> <precomp 0/1>")
        sys.exit(1)

    data_id = int(sys.argv[1])
    random_state = int(sys.argv[2])
    precomp = bool(int(sys.argv[3]))

    if not 0 <= data_id < 20:
        print(f"Require 0 <= data_id < 20 (got {data_id}).")
        sys.exit(2)

    print("Chosen id:", data_id)
    print("Random_state:", random_state)

    ts = load_data(data_id)

    ts_period = _period.get_ts_period(ts)
    ts_trend, ts_season, ts_residuals = _detrend.decompose(ts,
                                                           ts_period=ts_period,
                                                           plot=True)

    ts_detrended = ts - ts_trend
    ts_deseasonalized = ts - ts_season

    score = sklearn.metrics.mean_squared_error

    components = {
        "ts": ts,
        "ts_trend": ts_trend,
        "ts_season": ts_residuals,
        "ts_residuals": ts_residuals,
        "ts_detrended": ts_detrended,
        "ts_deseasonalized": ts_deseasonalized,
        "random_state": random_state,
        "score": score,
    }
    initial_len = len(components)

    precomps = (
        MFETSGeneral.precompute_walker,
        MFETSGeneral.precompute_embed_caos_method,
        MFETSGeneral.precompute_period,
        MFETSGeneral.precompute_ts_scaled,
        MFETSFreqDomain.precompute_ps_residuals,
        MFETSGlobalStats.precompute_period,
        MFETSAutocorr.precompute_detrended_acf,
        MFETSAutocorr.precompute_gaussian_model,
        MFETSLocalStats.precompute_ts_scaled,
        MFETSLocalStats.precompute_rolling_window,
        MFETSModelBased.precompute_ts_scaled,
        MFETSModelBased.precompute_period,
        MFETSModelBased.precompute_model_ets,
        MFETSModelBased.precompute_ioe_std_linear_model,
        MFETSRandomize.precompute_ts_scaled,
        MFETSRandomize.precompute_itrand_stats,
        MFETSInfoTheory.precompute_ts_scaled,
        MFETSInfoTheory.precompute_detrended_ami,
    )

    methods = (
        MFETSGeneral.ft_emb_lag,
        MFETSGeneral.ft_stick_angles,
        MFETSGeneral.ft_fs_len,
        MFETSGeneral.ft_fnn_prop,
        MFETSGeneral.ft_embed_in_shell,
        MFETSGeneral.ft_force_potential,
        MFETSGeneral.ft_walker_cross_frac,
        MFETSGeneral.ft_walker_path,
        MFETSGeneral.ft_pred,
        MFETSGeneral.ft_moving_threshold,
        MFETSGeneral.ft_turning_points,
        MFETSGeneral.ft_step_changes,
        MFETSGeneral.ft_turning_points_trend,
        MFETSGeneral.ft_step_changes_trend,
        MFETSGeneral.ft_length,
        MFETSGeneral.ft_frac_cp,
        MFETSGeneral.ft_bin_mean,
        MFETSGeneral.ft_period,
        MFETSGeneral.ft_peak_frac,
        MFETSGeneral.ft_trough_frac,
        MFETSGeneral.ft_diff,
        MFETSGeneral.ft_cao_e1,
        MFETSGeneral.ft_cao_e2,
        MFETSGeneral.ft_emb_dim_cao,
        MFETSAutocorr.ft_gen_autocorr,
        MFETSAutocorr.ft_trev,
        MFETSAutocorr.ft_acf_first_nonsig,
        MFETSAutocorr.ft_acf_first_nonpos,
        MFETSAutocorr.ft_tc3,
        MFETSAutocorr.ft_autocorr_out_dist,
        MFETSAutocorr.ft_first_acf_locmin,
        MFETSAutocorr.ft_gresid_autocorr,
        MFETSAutocorr.ft_autocorr_crit_pt,
        MFETSAutocorr.ft_acf_detrended,
        MFETSAutocorr.ft_pacf,
        MFETSAutocorr.ft_acf,
        MFETSAutocorr.ft_acf_diff,
        MFETSAutocorr.ft_pacf_diff,
        MFETSAutocorr.ft_pacf_detrended,
        MFETSAutocorr.ft_gresid_lbtest,
        MFETSFreqDomain.ft_low_freq_power,
        MFETSFreqDomain.ft_ps_residuals,
        MFETSFreqDomain.ft_ps_freqs,
        MFETSFreqDomain.ft_ps_peaks,
        MFETSFreqDomain.ft_ps_entropy,
        MFETSGlobalStats.ft_dfa,
        MFETSGlobalStats.ft_corr_dim,
        MFETSGlobalStats.ft_ioe_tdelta_mean,
        MFETSGlobalStats.ft_t_mean,
        MFETSGlobalStats.ft_opt_boxcox_coef,
        MFETSGlobalStats.ft_sd_diff,
        MFETSGlobalStats.ft_sd_sdiff,
        MFETSGlobalStats.ft_skewness_diff,
        MFETSGlobalStats.ft_skewness_sdiff,
        MFETSGlobalStats.ft_kurtosis_diff,
        MFETSGlobalStats.ft_kurtosis_sdiff,
        MFETSGlobalStats.ft_exp_max_lyap,
        MFETSGlobalStats.ft_exp_hurst,
        MFETSGlobalStats.ft_skewness_residuals,
        MFETSGlobalStats.ft_kurtosis_residuals,
        MFETSGlobalStats.ft_sd_residuals,
        MFETSGlobalStats.ft_trend_strenght,
        MFETSGlobalStats.ft_season_strenght,
        MFETSGlobalStats.ft_spikiness,
        MFETSLocalStats.ft_moving_lilliefors,
        MFETSLocalStats.ft_moving_approx_ent,
        MFETSLocalStats.ft_moving_avg,
        MFETSLocalStats.ft_moving_avg_shift,
        MFETSLocalStats.ft_moving_var_shift,
        MFETSLocalStats.ft_moving_skewness_shift,
        MFETSLocalStats.ft_moving_kurtosis_shift,
        MFETSLocalStats.ft_moving_gmean_shift,
        MFETSLocalStats.ft_moving_sd_shift,
        MFETSLocalStats.ft_moving_acf_shift,
        MFETSLocalStats.ft_moving_kldiv_shift,
        MFETSLocalStats.ft_lumpiness,
        MFETSLocalStats.ft_stability,
        MFETSLocalStats.ft_moving_var,
        MFETSLocalStats.ft_moving_skewness,
        MFETSLocalStats.ft_moving_kurtosis,
        MFETSLocalStats.ft_moving_gmean,
        MFETSLocalStats.ft_moving_sd,
        MFETSLocalStats.ft_moving_acf,
        MFETSLocalStats.ft_moving_kldiv,
        MFETSLocalStats.ft_local_extrema,
        MFETSLocalStats.ft_local_range,
        MFETSModelBased.ft_avg_cycle_period,
        MFETSModelBased.ft_ioe_std_adj_r_sqr,
        MFETSModelBased.ft_ioe_std_slope,
        MFETSModelBased.ft_gaussian_r_sqr,
        MFETSModelBased.ft_linearity,
        MFETSModelBased.ft_curvature,
        MFETSModelBased.ft_des_level,
        MFETSModelBased.ft_des_slope,
        MFETSModelBased.ft_ets_level,
        MFETSModelBased.ft_ets_slope,
        MFETSModelBased.ft_ets_season,
        MFETSLandmarking.ft_model_linear_seasonal,
        MFETSLandmarking.ft_model_linear_embed,
        MFETSLandmarking.ft_model_exp,
        MFETSLandmarking.ft_model_sine,
        MFETSLandmarking.ft_model_loc_median,
        MFETSLandmarking.ft_model_loc_mean,
        MFETSLandmarking.ft_model_naive_seasonal,
        MFETSLandmarking.ft_model_naive_drift,
        MFETSLandmarking.ft_model_gaussian,
        MFETSLandmarking.ft_model_hwes_ada,
        MFETSLandmarking.ft_model_hwes_adm,
        MFETSLandmarking.ft_model_naive,
        MFETSLandmarking.ft_model_mean,
        MFETSLandmarking.ft_model_mean_acf_first_nonpos,
        MFETSLandmarking.ft_model_ses,
        MFETSLandmarking.ft_model_arima_100_c,
        MFETSLandmarking.ft_model_arima_010_c,
        MFETSLandmarking.ft_model_arima_110_c,
        MFETSLandmarking.ft_model_arima_011_nc,
        MFETSLandmarking.ft_model_arima_011_c,
        MFETSLandmarking.ft_model_arima_021_c,
        MFETSLandmarking.ft_model_arima_112_nc,
        MFETSLandmarking.ft_model_linear,
        MFETSLandmarking.ft_model_linear_acf_first_nonpos,
        MFETSRandomize.ft_resample_first_acf_nonpos,
        MFETSRandomize.ft_resample_first_acf_locmin,
        MFETSRandomize.ft_surr_tc3,
        MFETSRandomize.ft_surr_trev,
        MFETSRandomize.ft_itrand_mean,
        MFETSRandomize.ft_itrand_sd,
        MFETSRandomize.ft_itrand_acf,
        MFETSRandomize.ft_resample_std,
        MFETSStatTests.ft_test_lilliefors,
        MFETSStatTests.ft_test_lb,
        MFETSStatTests.ft_test_earch,
        MFETSStatTests.ft_test_adf,
        MFETSStatTests.ft_test_adf_gls,
        MFETSStatTests.ft_test_kpss,
        MFETSStatTests.ft_test_pp,
        MFETSStatTests.ft_test_dw,
        MFETSStatTests.ft_test_za,
        MFETSInfoTheory.ft_ami_detrended,
        MFETSInfoTheory.ft_ami,
        MFETSInfoTheory.ft_lz_complexity,
        MFETSInfoTheory.ft_sample_entropy,
        MFETSInfoTheory.ft_approx_entropy,
        MFETSInfoTheory.ft_control_entropy,
        MFETSInfoTheory.ft_surprise,
        MFETSInfoTheory.ft_ami_curvature,
        MFETSInfoTheory.ft_ami_first_critpt,
        MFETSInfoTheory.ft_hist_entropy,
        MFETSInfoTheory.ft_hist_ent_out_diff,
    )

    errors = []

    if precomp:
        for i, method in enumerate(precomps, 1):
            print(
                f"Precomputation method {i} of {len(precomps)}: {method.__name__}..."
            )

            params = inspect.signature(method).parameters.keys()
            component_names = frozenset(components.keys())
            intersec = component_names.intersection(params)

            args = {
                name: comp
                for name, comp in components.items() if name in intersec
            }
            print(3 * " ", f"Args {len(args)}: ", args.keys())

            try:
                res = method(**args)
                components.update(res)

            except Exception as ex:
                errors.append(("P", ex, method.__name__))

    component_names = frozenset(components.keys())

    for i, method in enumerate(methods, 1):
        print(f"method {i} of {len(methods)}: {method.__name__}...")

        sig = inspect.signature(method)
        params = sig.parameters.keys()
        intersec = component_names.intersection(params)

        args = {
            name: comp
            for name, comp in components.items() if name in intersec
        }
        print(3 * " ", f"Args {len(args)}: ", args.keys())

        try:
            res = method(**args)

            type_ = type(res)
            type_ = float if type_ is np.float64 else type_
            type_ = int if type_ is np.int64 else type_

            exp_ret_type = sig.return_annotation
            is_single_type = not hasattr(sig.return_annotation, "__args__")

            if type_ is not exp_ret_type and (is_single_type or type_
                                              not in exp_ret_type.__args__):
                raise TypeError(
                    f"Return ({res}) type {type(res)} does not conform to the return type ({sig.return_annotation})."
                )

        except Exception as ex:
            errors.append(("M", ex, method.__name__))

    if errors:
        for typ, err, method in errors:
            print(f"-> ({typ})", err, method)

    print("Time-series estimated period:", ts_period)
    print(f"Total of {len(errors)} exceptions raised.")
    print("Chosen id:", data_id, "Random_state:", random_state)
    print(f"Components got (total of {len(components) - initial_len} new):",
          len(components))
Example #6
0
    def _fit_res_model_ets(
        ts: np.ndarray,
        damped: bool = False,
        grid_search_guess: bool = True,
        ts_period: t.Optional[int] = None,
        ts_scaled: t.Optional[np.ndarray] = None,
    ) -> statsmodels.tsa.holtwinters.HoltWintersResultsWrapper:
        """Fit a triple exponential smoothing model with additive components.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        damped : bool, optional (default=False)
            Whether or not the exponential smoothing model should include a
            damping component.

        grid_search_guess : bool, optional (default=True)
            If True, used grid search (a.k.a. brute force) to search for good
            starting parameters. If False, this method becomes more less
            computationally intensive, but may fail to converge with higher
            chances.

        ts_period : int, optional
            Time-series period.

        ts_scaled : :obj:`np.ndarray`, optional
            Standardized time-series values. Used to take advantage of
            precomputations.

        Returns
        -------
        :obj:`statsmodels.tsa.holtwinters.HoltWintersResultsWrapper`
            Results of a optimized triple exponential smoothing model.

        References
        ----------
        .. [1] Winters, Peter R. Forecasting Sales by Exponentially Weighted
            Moving Averages, 1960, INFORMS, Linthicum, MD, USA
            https://doi.org/10.1287/mnsc.6.3.324
        .. [2] Charles C. Holt, Forecasting seasonals and trends by
            exponentially weighted moving averages, International Journal of
            Forecasting, Volume 20, Issue 1, 2004, Pages 5-10, ISSN 0169-2070,
            https://doi.org/10.1016/j.ijforecast.2003.09.015.
        """
        ts_scaled = _utils.standardize_ts(ts=ts, ts_scaled=ts_scaled)

        ts_period = _period.get_ts_period(ts=ts_scaled, ts_period=ts_period)

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                module="statsmodels",
                category=statsmodels.tools.sm_exceptions.ConvergenceWarning)

            model = statsmodels.tsa.holtwinters.ExponentialSmoothing(
                endog=ts_scaled,
                trend="additive",
                seasonal="additive",
                damped=damped,
                seasonal_periods=ts_period).fit(use_brute=grid_search_guess)

        return model