Ejemplo n.º 1
0
def get_ts_period(ts: np.ndarray,
                  ts_detrended: t.Optional[np.ndarray] = None,
                  ts_period: t.Optional[int] = None) -> int:
    """Return the time-series periodicity, if any.

    The time-series is detrended first using the Friedman's Super Smoother
    (if ``ts_detrended`` is None).

    It is calculated the autocorrelation of the time-series up to
    floor(ts.size / 2), using the fast-fourier transform method.

    The time-series period is the argument where the autocorrelation function
    assumed maximal absolute value.
    """
    if ts_period is not None:
        return max(int(ts_period), 1)

    if ts.size <= 1:
        return 1

    if ts_detrended is None:
        ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2]

    autocorr = statsmodels.tsa.stattools.acf(ts_detrended,
                                             nlags=ts_detrended.size // 2,
                                             fft=True,
                                             unbiased=True)[1:]

    period = np.argmax(np.abs(autocorr)) + 1

    return period
Ejemplo n.º 2
0
    def _calc_pacf(cls,
                   ts: np.ndarray,
                   nlags: t.Optional[int] = None,
                   method: str = "ols-unbiased",
                   detrend: bool = True,
                   ts_detrended: t.Optional[np.ndarray] = None) -> np.ndarray:
        """Precompute the partial autocorrelation function.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        nlags : int, optional
            Number of lags to calculate the partial autocorrelation function.

        method : str, optional (default="ols-unbiased")
            Method used to estimate the partial autocorrelations. Check the
            `statsmodels.tsa.stattools.pacf` documentation for the complete
            list of the available methods.

        detrend : bool, optional (default=True)
            If True, detrend the time-series using Friedman's Super Smoother
            before calculating the autocorrelation function, or the user
            given detrended time-series from ``ts_detrended`` argument.

        ts_detrended : :obj:`np.ndarray`, optional
            Detrended time-series. Used only if `detrend` is False. If not
            given, the time-series is detrended within this method using
            Friedman's Super Smoother.

        Returns
        -------
        :obj:`np.ndarray`
            If `detrend` is True, the partial autocorrelation function up to
            `nlags` lags of the detrended time-series. If `detrend` is False,
            the autocorrelation function up to `nlags` lags of the time-series.
        """
        if nlags is None:
            nlags = 1 + ts.size // 10

        if detrend and ts_detrended is None:
            try:
                ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2]

            except ValueError:
                pass

        if ts_detrended is None:
            ts_detrended = ts

        pacf = statsmodels.tsa.stattools.pacf(ts_detrended,
                                              nlags=nlags,
                                              method=method)
        return pacf[1:]
Ejemplo n.º 3
0
def _test() -> None:
    if len(sys.argv) <= 3:
        print("usage:", sys.argv[0], "<data_id> <random_seed> <precomp 0/1>")
        sys.exit(1)

    data_id = int(sys.argv[1])
    random_state = int(sys.argv[2])
    precomp = bool(int(sys.argv[3]))

    if not 0 <= data_id < 20:
        print(f"Require 0 <= data_id < 20 (got {data_id}).")
        sys.exit(2)

    print("Chosen id:", data_id)
    print("Random_state:", random_state)

    ts = load_data(data_id)

    ts_period = _period.get_ts_period(ts)
    ts_trend, ts_season, ts_residuals = _detrend.decompose(ts,
                                                           ts_period=ts_period,
                                                           plot=True)

    ts_detrended = ts - ts_trend
    ts_deseasonalized = ts - ts_season

    score = sklearn.metrics.mean_squared_error

    components = {
        "ts": ts,
        "ts_trend": ts_trend,
        "ts_season": ts_residuals,
        "ts_residuals": ts_residuals,
        "ts_detrended": ts_detrended,
        "ts_deseasonalized": ts_deseasonalized,
        "random_state": random_state,
        "score": score,
    }
    initial_len = len(components)

    precomps = (
        MFETSGeneral.precompute_walker,
        MFETSGeneral.precompute_embed_caos_method,
        MFETSGeneral.precompute_period,
        MFETSGeneral.precompute_ts_scaled,
        MFETSFreqDomain.precompute_ps_residuals,
        MFETSGlobalStats.precompute_period,
        MFETSAutocorr.precompute_detrended_acf,
        MFETSAutocorr.precompute_gaussian_model,
        MFETSLocalStats.precompute_ts_scaled,
        MFETSLocalStats.precompute_rolling_window,
        MFETSModelBased.precompute_ts_scaled,
        MFETSModelBased.precompute_period,
        MFETSModelBased.precompute_model_ets,
        MFETSModelBased.precompute_ioe_std_linear_model,
        MFETSRandomize.precompute_ts_scaled,
        MFETSRandomize.precompute_itrand_stats,
        MFETSInfoTheory.precompute_ts_scaled,
        MFETSInfoTheory.precompute_detrended_ami,
    )

    methods = (
        MFETSGeneral.ft_emb_lag,
        MFETSGeneral.ft_stick_angles,
        MFETSGeneral.ft_fs_len,
        MFETSGeneral.ft_fnn_prop,
        MFETSGeneral.ft_embed_in_shell,
        MFETSGeneral.ft_force_potential,
        MFETSGeneral.ft_walker_cross_frac,
        MFETSGeneral.ft_walker_path,
        MFETSGeneral.ft_pred,
        MFETSGeneral.ft_moving_threshold,
        MFETSGeneral.ft_turning_points,
        MFETSGeneral.ft_step_changes,
        MFETSGeneral.ft_turning_points_trend,
        MFETSGeneral.ft_step_changes_trend,
        MFETSGeneral.ft_length,
        MFETSGeneral.ft_frac_cp,
        MFETSGeneral.ft_bin_mean,
        MFETSGeneral.ft_period,
        MFETSGeneral.ft_peak_frac,
        MFETSGeneral.ft_trough_frac,
        MFETSGeneral.ft_diff,
        MFETSGeneral.ft_cao_e1,
        MFETSGeneral.ft_cao_e2,
        MFETSGeneral.ft_emb_dim_cao,
        MFETSAutocorr.ft_gen_autocorr,
        MFETSAutocorr.ft_trev,
        MFETSAutocorr.ft_acf_first_nonsig,
        MFETSAutocorr.ft_acf_first_nonpos,
        MFETSAutocorr.ft_tc3,
        MFETSAutocorr.ft_autocorr_out_dist,
        MFETSAutocorr.ft_first_acf_locmin,
        MFETSAutocorr.ft_gresid_autocorr,
        MFETSAutocorr.ft_autocorr_crit_pt,
        MFETSAutocorr.ft_acf_detrended,
        MFETSAutocorr.ft_pacf,
        MFETSAutocorr.ft_acf,
        MFETSAutocorr.ft_acf_diff,
        MFETSAutocorr.ft_pacf_diff,
        MFETSAutocorr.ft_pacf_detrended,
        MFETSAutocorr.ft_gresid_lbtest,
        MFETSFreqDomain.ft_low_freq_power,
        MFETSFreqDomain.ft_ps_residuals,
        MFETSFreqDomain.ft_ps_freqs,
        MFETSFreqDomain.ft_ps_peaks,
        MFETSFreqDomain.ft_ps_entropy,
        MFETSGlobalStats.ft_dfa,
        MFETSGlobalStats.ft_corr_dim,
        MFETSGlobalStats.ft_ioe_tdelta_mean,
        MFETSGlobalStats.ft_t_mean,
        MFETSGlobalStats.ft_opt_boxcox_coef,
        MFETSGlobalStats.ft_sd_diff,
        MFETSGlobalStats.ft_sd_sdiff,
        MFETSGlobalStats.ft_skewness_diff,
        MFETSGlobalStats.ft_skewness_sdiff,
        MFETSGlobalStats.ft_kurtosis_diff,
        MFETSGlobalStats.ft_kurtosis_sdiff,
        MFETSGlobalStats.ft_exp_max_lyap,
        MFETSGlobalStats.ft_exp_hurst,
        MFETSGlobalStats.ft_skewness_residuals,
        MFETSGlobalStats.ft_kurtosis_residuals,
        MFETSGlobalStats.ft_sd_residuals,
        MFETSGlobalStats.ft_trend_strenght,
        MFETSGlobalStats.ft_season_strenght,
        MFETSGlobalStats.ft_spikiness,
        MFETSLocalStats.ft_moving_lilliefors,
        MFETSLocalStats.ft_moving_approx_ent,
        MFETSLocalStats.ft_moving_avg,
        MFETSLocalStats.ft_moving_avg_shift,
        MFETSLocalStats.ft_moving_var_shift,
        MFETSLocalStats.ft_moving_skewness_shift,
        MFETSLocalStats.ft_moving_kurtosis_shift,
        MFETSLocalStats.ft_moving_gmean_shift,
        MFETSLocalStats.ft_moving_sd_shift,
        MFETSLocalStats.ft_moving_acf_shift,
        MFETSLocalStats.ft_moving_kldiv_shift,
        MFETSLocalStats.ft_lumpiness,
        MFETSLocalStats.ft_stability,
        MFETSLocalStats.ft_moving_var,
        MFETSLocalStats.ft_moving_skewness,
        MFETSLocalStats.ft_moving_kurtosis,
        MFETSLocalStats.ft_moving_gmean,
        MFETSLocalStats.ft_moving_sd,
        MFETSLocalStats.ft_moving_acf,
        MFETSLocalStats.ft_moving_kldiv,
        MFETSLocalStats.ft_local_extrema,
        MFETSLocalStats.ft_local_range,
        MFETSModelBased.ft_avg_cycle_period,
        MFETSModelBased.ft_ioe_std_adj_r_sqr,
        MFETSModelBased.ft_ioe_std_slope,
        MFETSModelBased.ft_gaussian_r_sqr,
        MFETSModelBased.ft_linearity,
        MFETSModelBased.ft_curvature,
        MFETSModelBased.ft_des_level,
        MFETSModelBased.ft_des_slope,
        MFETSModelBased.ft_ets_level,
        MFETSModelBased.ft_ets_slope,
        MFETSModelBased.ft_ets_season,
        MFETSLandmarking.ft_model_linear_seasonal,
        MFETSLandmarking.ft_model_linear_embed,
        MFETSLandmarking.ft_model_exp,
        MFETSLandmarking.ft_model_sine,
        MFETSLandmarking.ft_model_loc_median,
        MFETSLandmarking.ft_model_loc_mean,
        MFETSLandmarking.ft_model_naive_seasonal,
        MFETSLandmarking.ft_model_naive_drift,
        MFETSLandmarking.ft_model_gaussian,
        MFETSLandmarking.ft_model_hwes_ada,
        MFETSLandmarking.ft_model_hwes_adm,
        MFETSLandmarking.ft_model_naive,
        MFETSLandmarking.ft_model_mean,
        MFETSLandmarking.ft_model_mean_acf_first_nonpos,
        MFETSLandmarking.ft_model_ses,
        MFETSLandmarking.ft_model_arima_100_c,
        MFETSLandmarking.ft_model_arima_010_c,
        MFETSLandmarking.ft_model_arima_110_c,
        MFETSLandmarking.ft_model_arima_011_nc,
        MFETSLandmarking.ft_model_arima_011_c,
        MFETSLandmarking.ft_model_arima_021_c,
        MFETSLandmarking.ft_model_arima_112_nc,
        MFETSLandmarking.ft_model_linear,
        MFETSLandmarking.ft_model_linear_acf_first_nonpos,
        MFETSRandomize.ft_resample_first_acf_nonpos,
        MFETSRandomize.ft_resample_first_acf_locmin,
        MFETSRandomize.ft_surr_tc3,
        MFETSRandomize.ft_surr_trev,
        MFETSRandomize.ft_itrand_mean,
        MFETSRandomize.ft_itrand_sd,
        MFETSRandomize.ft_itrand_acf,
        MFETSRandomize.ft_resample_std,
        MFETSStatTests.ft_test_lilliefors,
        MFETSStatTests.ft_test_lb,
        MFETSStatTests.ft_test_earch,
        MFETSStatTests.ft_test_adf,
        MFETSStatTests.ft_test_adf_gls,
        MFETSStatTests.ft_test_kpss,
        MFETSStatTests.ft_test_pp,
        MFETSStatTests.ft_test_dw,
        MFETSStatTests.ft_test_za,
        MFETSInfoTheory.ft_ami_detrended,
        MFETSInfoTheory.ft_ami,
        MFETSInfoTheory.ft_lz_complexity,
        MFETSInfoTheory.ft_sample_entropy,
        MFETSInfoTheory.ft_approx_entropy,
        MFETSInfoTheory.ft_control_entropy,
        MFETSInfoTheory.ft_surprise,
        MFETSInfoTheory.ft_ami_curvature,
        MFETSInfoTheory.ft_ami_first_critpt,
        MFETSInfoTheory.ft_hist_entropy,
        MFETSInfoTheory.ft_hist_ent_out_diff,
    )

    errors = []

    if precomp:
        for i, method in enumerate(precomps, 1):
            print(
                f"Precomputation method {i} of {len(precomps)}: {method.__name__}..."
            )

            params = inspect.signature(method).parameters.keys()
            component_names = frozenset(components.keys())
            intersec = component_names.intersection(params)

            args = {
                name: comp
                for name, comp in components.items() if name in intersec
            }
            print(3 * " ", f"Args {len(args)}: ", args.keys())

            try:
                res = method(**args)
                components.update(res)

            except Exception as ex:
                errors.append(("P", ex, method.__name__))

    component_names = frozenset(components.keys())

    for i, method in enumerate(methods, 1):
        print(f"method {i} of {len(methods)}: {method.__name__}...")

        sig = inspect.signature(method)
        params = sig.parameters.keys()
        intersec = component_names.intersection(params)

        args = {
            name: comp
            for name, comp in components.items() if name in intersec
        }
        print(3 * " ", f"Args {len(args)}: ", args.keys())

        try:
            res = method(**args)

            type_ = type(res)
            type_ = float if type_ is np.float64 else type_
            type_ = int if type_ is np.int64 else type_

            exp_ret_type = sig.return_annotation
            is_single_type = not hasattr(sig.return_annotation, "__args__")

            if type_ is not exp_ret_type and (is_single_type or type_
                                              not in exp_ret_type.__args__):
                raise TypeError(
                    f"Return ({res}) type {type(res)} does not conform to the return type ({sig.return_annotation})."
                )

        except Exception as ex:
            errors.append(("M", ex, method.__name__))

    if errors:
        for typ, err, method in errors:
            print(f"-> ({typ})", err, method)

    print("Time-series estimated period:", ts_period)
    print(f"Total of {len(errors)} exceptions raised.")
    print("Chosen id:", data_id, "Random_state:", random_state)
    print(f"Components got (total of {len(components) - initial_len} new):",
          len(components))
Ejemplo n.º 4
0
    def _calc_acf(cls,
                  ts: np.ndarray,
                  nlags: t.Optional[int] = None,
                  unbiased: bool = True,
                  detrend: bool = True,
                  detrended_acfs: t.Optional[np.ndarray] = None,
                  ts_detrended: t.Optional[np.ndarray] = None) -> np.ndarray:
        """Precompute the autocorrelation function.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        nlags : int, optional
            Number of lags to calculate the autocorrelation function.

        unbiased : bool, optional (default=True)
            If True, the autocorrelation function is corrected for statistical
            bias.

        detrend : bool, optional (default=True)
            If True, detrend the time-series using Friedman's Super Smoother
            before calculating the autocorrelation function, or the user
            given detrended time-series from ``ts_detrended`` argument.

        detrended_acfs : :obj:`np.ndarray`, optional
            This method's return value. Used to take advantage of
            precomputations.

        ts_detrended : :obj:`np.ndarray`, optional
            Detrended time-series. Used only if `detrend` is False.

        Returns
        -------
        :obj:`np.ndarray`
            If `detrend` is True, the autocorrelation function up to `nlags`
            lags of the detrended time-series. If `detrend` is False, the
            autocorrelation function up to `nlags` lags of the time-series.
        """
        if detrended_acfs is not None and (nlags is None
                                           or detrended_acfs.size == nlags):
            return detrended_acfs

        if detrend and ts_detrended is None:
            try:
                ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2]

            except ValueError:
                pass

        if ts_detrended is None:
            ts_detrended = ts

        if nlags is None:
            nlags = ts.size // 2

        acf = statsmodels.tsa.stattools.acf(ts_detrended,
                                            nlags=nlags,
                                            unbiased=unbiased,
                                            fft=True)
        return acf[1:]
Ejemplo n.º 5
0
    def ft_ami_detrended(
            cls,
            ts: np.ndarray,
            num_bins: int = 64,
            lags: t.Optional[t.Union[int, t.Sequence[int]]] = None,
            return_dist: bool = False,
            max_nlags: t.Optional[int] = None,
            ts_detrended: t.Optional[np.ndarray] = None,
            detrended_acfs: t.Optional[np.ndarray] = None,
            detrended_ami: t.Optional[np.ndarray] = None) -> np.ndarray:
        """Automutual information of the detrended time-series.

        The automutual information AMI is defined as:
        $$
            AMI(ts) = H(ts_A) + H(ts_B) - H(ts_A, ts_B)
        $$
        where `ts` is the time-series, $H$ is the Shannon entropy function, and
        $H(A, B)$ is the Shannon entropy of the joint probability of A and B.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        num_bins : int, optional (default=64)
            Number of histogram bins to estimate both the probability density
            of each lagged component, and the joint probability distribution,
            which are all necessary to the automutual information computation.

        lags : int or sequence of int, optional
            Lags to calculate the automutual information.
            If int, calculate the automutual information from lag 1 to up to
            the given ``lags`` value.
            If sequence of integers, calculate the automutual information for
            each of the given lags.
            If None, estimate the appropriate lag from the autocorrelation
            function (using the lag corresponding to the first non-positive
            value), and estimate the automutual information from lag 1 up to
            the estimated appropriate lag.

        return_dist : bool, optional (default=False)
            If True, return the automutual information distance for every lag,
            defined as:
            $$
                DAMI(ts) = 1 - AMI(ts) / H(ts_A, ts_B)
                         = (H(ts_A) + H(ts_B)) / H(ts_A, ts_B)
            $$

        max_nlags : int, optional
            If ``lags`` is None, then a single lag will be estimated from the
            first negative value of the detrended time-series autocorrelation
            function up to `max_nlags`, if any. Otherwise, lag 1 will be used.
            Used only if ``detrended_acfs`` is None.

        ts_detrended : :obj:`np.ndarray`, optional
            Detrended time-series. If None, the time-series will be detrended
            using Friedman's Super Smoother.

        detrended_acfs : :obj:`np.ndarray`, optional
            Array of time-series autocorrelation function (for distinct ordered
            lags) of the detrended time-series. Used only if ``lag`` is None.
            If this argument is not given and the previous condiditon is meet,
            the autocorrelation function will be calculated inside this method
            up to ``max_nlags``.

        detrended_ami : :obj:`np.ndarray`, optional
            This method's return value. Used to take advantage of
            precomputations.

        Returns
        -------
        :obj:`np.ndarray`
            If `return_dist` is False, return the automutual information of
            the time-series for all given lags. If `return_dist` is True,
            return the distance metric version of the automutual information
            for all given lags.

        References
        ----------
        .. [1] Fraser AM, Swinney HL. Independent coordinates for strange
            attractors from mutual information. Phys Rev A Gen Phys.
            1986;33(2):1134‐1140. doi:10.1103/physreva.33.1134
        .. [2] B.D. Fulcher and N.S. Jones, "hctsa: A Computational Framework
            for Automated Time-Series Phenotyping Using Massive Feature
            Extraction, Cell Systems 5: 527 (2017).
            DOI: 10.1016/j.cels.2017.10.001
        .. [3] B.D. Fulcher, M.A. Little, N.S. Jones, "Highly comparative
            time-series analysis: the empirical structure of time series and
            their methods", J. Roy. Soc. Interface 10(83) 20130048 (2013).
            DOI: 10.1098/rsif.2013.0048
        .. [4] Thomas M. Cover and Joy A. Thomas. 1991. Elements of information
            theory. Wiley-Interscience, USA.
        """
        if detrended_ami is not None:
            return detrended_ami

        if lags is None:
            lags = _embed.embed_lag(ts=ts,
                                    lag="acf",
                                    max_nlags=max_nlags,
                                    detrended_acfs=detrended_acfs)

        if np.isscalar(lags):
            lags = np.arange(1, 1 + int(lags))  # type: ignore

        if ts_detrended is None:
            ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2]

        _lags: t.Sequence[int] = np.asarray(lags, dtype=int)
        ami = np.zeros(len(_lags), dtype=float)

        for ind, lag in enumerate(_lags):
            ami[ind] = cls._calc_ami(ts=ts_detrended,
                                     lag=lag,
                                     num_bins=num_bins,
                                     return_dist=return_dist)

        return ami