def get_ts_period(ts: np.ndarray, ts_detrended: t.Optional[np.ndarray] = None, ts_period: t.Optional[int] = None) -> int: """Return the time-series periodicity, if any. The time-series is detrended first using the Friedman's Super Smoother (if ``ts_detrended`` is None). It is calculated the autocorrelation of the time-series up to floor(ts.size / 2), using the fast-fourier transform method. The time-series period is the argument where the autocorrelation function assumed maximal absolute value. """ if ts_period is not None: return max(int(ts_period), 1) if ts.size <= 1: return 1 if ts_detrended is None: ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2] autocorr = statsmodels.tsa.stattools.acf(ts_detrended, nlags=ts_detrended.size // 2, fft=True, unbiased=True)[1:] period = np.argmax(np.abs(autocorr)) + 1 return period
def _calc_pacf(cls, ts: np.ndarray, nlags: t.Optional[int] = None, method: str = "ols-unbiased", detrend: bool = True, ts_detrended: t.Optional[np.ndarray] = None) -> np.ndarray: """Precompute the partial autocorrelation function. Parameters ---------- ts : :obj:`np.ndarray` One-dimensional time-series values. nlags : int, optional Number of lags to calculate the partial autocorrelation function. method : str, optional (default="ols-unbiased") Method used to estimate the partial autocorrelations. Check the `statsmodels.tsa.stattools.pacf` documentation for the complete list of the available methods. detrend : bool, optional (default=True) If True, detrend the time-series using Friedman's Super Smoother before calculating the autocorrelation function, or the user given detrended time-series from ``ts_detrended`` argument. ts_detrended : :obj:`np.ndarray`, optional Detrended time-series. Used only if `detrend` is False. If not given, the time-series is detrended within this method using Friedman's Super Smoother. Returns ------- :obj:`np.ndarray` If `detrend` is True, the partial autocorrelation function up to `nlags` lags of the detrended time-series. If `detrend` is False, the autocorrelation function up to `nlags` lags of the time-series. """ if nlags is None: nlags = 1 + ts.size // 10 if detrend and ts_detrended is None: try: ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2] except ValueError: pass if ts_detrended is None: ts_detrended = ts pacf = statsmodels.tsa.stattools.pacf(ts_detrended, nlags=nlags, method=method) return pacf[1:]
def _test() -> None: if len(sys.argv) <= 3: print("usage:", sys.argv[0], "<data_id> <random_seed> <precomp 0/1>") sys.exit(1) data_id = int(sys.argv[1]) random_state = int(sys.argv[2]) precomp = bool(int(sys.argv[3])) if not 0 <= data_id < 20: print(f"Require 0 <= data_id < 20 (got {data_id}).") sys.exit(2) print("Chosen id:", data_id) print("Random_state:", random_state) ts = load_data(data_id) ts_period = _period.get_ts_period(ts) ts_trend, ts_season, ts_residuals = _detrend.decompose(ts, ts_period=ts_period, plot=True) ts_detrended = ts - ts_trend ts_deseasonalized = ts - ts_season score = sklearn.metrics.mean_squared_error components = { "ts": ts, "ts_trend": ts_trend, "ts_season": ts_residuals, "ts_residuals": ts_residuals, "ts_detrended": ts_detrended, "ts_deseasonalized": ts_deseasonalized, "random_state": random_state, "score": score, } initial_len = len(components) precomps = ( MFETSGeneral.precompute_walker, MFETSGeneral.precompute_embed_caos_method, MFETSGeneral.precompute_period, MFETSGeneral.precompute_ts_scaled, MFETSFreqDomain.precompute_ps_residuals, MFETSGlobalStats.precompute_period, MFETSAutocorr.precompute_detrended_acf, MFETSAutocorr.precompute_gaussian_model, MFETSLocalStats.precompute_ts_scaled, MFETSLocalStats.precompute_rolling_window, MFETSModelBased.precompute_ts_scaled, MFETSModelBased.precompute_period, MFETSModelBased.precompute_model_ets, MFETSModelBased.precompute_ioe_std_linear_model, MFETSRandomize.precompute_ts_scaled, MFETSRandomize.precompute_itrand_stats, MFETSInfoTheory.precompute_ts_scaled, MFETSInfoTheory.precompute_detrended_ami, ) methods = ( MFETSGeneral.ft_emb_lag, MFETSGeneral.ft_stick_angles, MFETSGeneral.ft_fs_len, MFETSGeneral.ft_fnn_prop, MFETSGeneral.ft_embed_in_shell, MFETSGeneral.ft_force_potential, MFETSGeneral.ft_walker_cross_frac, MFETSGeneral.ft_walker_path, MFETSGeneral.ft_pred, MFETSGeneral.ft_moving_threshold, MFETSGeneral.ft_turning_points, MFETSGeneral.ft_step_changes, MFETSGeneral.ft_turning_points_trend, MFETSGeneral.ft_step_changes_trend, MFETSGeneral.ft_length, MFETSGeneral.ft_frac_cp, MFETSGeneral.ft_bin_mean, MFETSGeneral.ft_period, MFETSGeneral.ft_peak_frac, MFETSGeneral.ft_trough_frac, MFETSGeneral.ft_diff, MFETSGeneral.ft_cao_e1, MFETSGeneral.ft_cao_e2, MFETSGeneral.ft_emb_dim_cao, MFETSAutocorr.ft_gen_autocorr, MFETSAutocorr.ft_trev, MFETSAutocorr.ft_acf_first_nonsig, MFETSAutocorr.ft_acf_first_nonpos, MFETSAutocorr.ft_tc3, MFETSAutocorr.ft_autocorr_out_dist, MFETSAutocorr.ft_first_acf_locmin, MFETSAutocorr.ft_gresid_autocorr, MFETSAutocorr.ft_autocorr_crit_pt, MFETSAutocorr.ft_acf_detrended, MFETSAutocorr.ft_pacf, MFETSAutocorr.ft_acf, MFETSAutocorr.ft_acf_diff, MFETSAutocorr.ft_pacf_diff, MFETSAutocorr.ft_pacf_detrended, MFETSAutocorr.ft_gresid_lbtest, MFETSFreqDomain.ft_low_freq_power, MFETSFreqDomain.ft_ps_residuals, MFETSFreqDomain.ft_ps_freqs, MFETSFreqDomain.ft_ps_peaks, MFETSFreqDomain.ft_ps_entropy, MFETSGlobalStats.ft_dfa, MFETSGlobalStats.ft_corr_dim, MFETSGlobalStats.ft_ioe_tdelta_mean, MFETSGlobalStats.ft_t_mean, MFETSGlobalStats.ft_opt_boxcox_coef, MFETSGlobalStats.ft_sd_diff, MFETSGlobalStats.ft_sd_sdiff, MFETSGlobalStats.ft_skewness_diff, MFETSGlobalStats.ft_skewness_sdiff, MFETSGlobalStats.ft_kurtosis_diff, MFETSGlobalStats.ft_kurtosis_sdiff, MFETSGlobalStats.ft_exp_max_lyap, MFETSGlobalStats.ft_exp_hurst, MFETSGlobalStats.ft_skewness_residuals, MFETSGlobalStats.ft_kurtosis_residuals, MFETSGlobalStats.ft_sd_residuals, MFETSGlobalStats.ft_trend_strenght, MFETSGlobalStats.ft_season_strenght, MFETSGlobalStats.ft_spikiness, MFETSLocalStats.ft_moving_lilliefors, MFETSLocalStats.ft_moving_approx_ent, MFETSLocalStats.ft_moving_avg, MFETSLocalStats.ft_moving_avg_shift, MFETSLocalStats.ft_moving_var_shift, MFETSLocalStats.ft_moving_skewness_shift, MFETSLocalStats.ft_moving_kurtosis_shift, MFETSLocalStats.ft_moving_gmean_shift, MFETSLocalStats.ft_moving_sd_shift, MFETSLocalStats.ft_moving_acf_shift, MFETSLocalStats.ft_moving_kldiv_shift, MFETSLocalStats.ft_lumpiness, MFETSLocalStats.ft_stability, MFETSLocalStats.ft_moving_var, MFETSLocalStats.ft_moving_skewness, MFETSLocalStats.ft_moving_kurtosis, MFETSLocalStats.ft_moving_gmean, MFETSLocalStats.ft_moving_sd, MFETSLocalStats.ft_moving_acf, MFETSLocalStats.ft_moving_kldiv, MFETSLocalStats.ft_local_extrema, MFETSLocalStats.ft_local_range, MFETSModelBased.ft_avg_cycle_period, MFETSModelBased.ft_ioe_std_adj_r_sqr, MFETSModelBased.ft_ioe_std_slope, MFETSModelBased.ft_gaussian_r_sqr, MFETSModelBased.ft_linearity, MFETSModelBased.ft_curvature, MFETSModelBased.ft_des_level, MFETSModelBased.ft_des_slope, MFETSModelBased.ft_ets_level, MFETSModelBased.ft_ets_slope, MFETSModelBased.ft_ets_season, MFETSLandmarking.ft_model_linear_seasonal, MFETSLandmarking.ft_model_linear_embed, MFETSLandmarking.ft_model_exp, MFETSLandmarking.ft_model_sine, MFETSLandmarking.ft_model_loc_median, MFETSLandmarking.ft_model_loc_mean, MFETSLandmarking.ft_model_naive_seasonal, MFETSLandmarking.ft_model_naive_drift, MFETSLandmarking.ft_model_gaussian, MFETSLandmarking.ft_model_hwes_ada, MFETSLandmarking.ft_model_hwes_adm, MFETSLandmarking.ft_model_naive, MFETSLandmarking.ft_model_mean, MFETSLandmarking.ft_model_mean_acf_first_nonpos, MFETSLandmarking.ft_model_ses, MFETSLandmarking.ft_model_arima_100_c, MFETSLandmarking.ft_model_arima_010_c, MFETSLandmarking.ft_model_arima_110_c, MFETSLandmarking.ft_model_arima_011_nc, MFETSLandmarking.ft_model_arima_011_c, MFETSLandmarking.ft_model_arima_021_c, MFETSLandmarking.ft_model_arima_112_nc, MFETSLandmarking.ft_model_linear, MFETSLandmarking.ft_model_linear_acf_first_nonpos, MFETSRandomize.ft_resample_first_acf_nonpos, MFETSRandomize.ft_resample_first_acf_locmin, MFETSRandomize.ft_surr_tc3, MFETSRandomize.ft_surr_trev, MFETSRandomize.ft_itrand_mean, MFETSRandomize.ft_itrand_sd, MFETSRandomize.ft_itrand_acf, MFETSRandomize.ft_resample_std, MFETSStatTests.ft_test_lilliefors, MFETSStatTests.ft_test_lb, MFETSStatTests.ft_test_earch, MFETSStatTests.ft_test_adf, MFETSStatTests.ft_test_adf_gls, MFETSStatTests.ft_test_kpss, MFETSStatTests.ft_test_pp, MFETSStatTests.ft_test_dw, MFETSStatTests.ft_test_za, MFETSInfoTheory.ft_ami_detrended, MFETSInfoTheory.ft_ami, MFETSInfoTheory.ft_lz_complexity, MFETSInfoTheory.ft_sample_entropy, MFETSInfoTheory.ft_approx_entropy, MFETSInfoTheory.ft_control_entropy, MFETSInfoTheory.ft_surprise, MFETSInfoTheory.ft_ami_curvature, MFETSInfoTheory.ft_ami_first_critpt, MFETSInfoTheory.ft_hist_entropy, MFETSInfoTheory.ft_hist_ent_out_diff, ) errors = [] if precomp: for i, method in enumerate(precomps, 1): print( f"Precomputation method {i} of {len(precomps)}: {method.__name__}..." ) params = inspect.signature(method).parameters.keys() component_names = frozenset(components.keys()) intersec = component_names.intersection(params) args = { name: comp for name, comp in components.items() if name in intersec } print(3 * " ", f"Args {len(args)}: ", args.keys()) try: res = method(**args) components.update(res) except Exception as ex: errors.append(("P", ex, method.__name__)) component_names = frozenset(components.keys()) for i, method in enumerate(methods, 1): print(f"method {i} of {len(methods)}: {method.__name__}...") sig = inspect.signature(method) params = sig.parameters.keys() intersec = component_names.intersection(params) args = { name: comp for name, comp in components.items() if name in intersec } print(3 * " ", f"Args {len(args)}: ", args.keys()) try: res = method(**args) type_ = type(res) type_ = float if type_ is np.float64 else type_ type_ = int if type_ is np.int64 else type_ exp_ret_type = sig.return_annotation is_single_type = not hasattr(sig.return_annotation, "__args__") if type_ is not exp_ret_type and (is_single_type or type_ not in exp_ret_type.__args__): raise TypeError( f"Return ({res}) type {type(res)} does not conform to the return type ({sig.return_annotation})." ) except Exception as ex: errors.append(("M", ex, method.__name__)) if errors: for typ, err, method in errors: print(f"-> ({typ})", err, method) print("Time-series estimated period:", ts_period) print(f"Total of {len(errors)} exceptions raised.") print("Chosen id:", data_id, "Random_state:", random_state) print(f"Components got (total of {len(components) - initial_len} new):", len(components))
def _calc_acf(cls, ts: np.ndarray, nlags: t.Optional[int] = None, unbiased: bool = True, detrend: bool = True, detrended_acfs: t.Optional[np.ndarray] = None, ts_detrended: t.Optional[np.ndarray] = None) -> np.ndarray: """Precompute the autocorrelation function. Parameters ---------- ts : :obj:`np.ndarray` One-dimensional time-series values. nlags : int, optional Number of lags to calculate the autocorrelation function. unbiased : bool, optional (default=True) If True, the autocorrelation function is corrected for statistical bias. detrend : bool, optional (default=True) If True, detrend the time-series using Friedman's Super Smoother before calculating the autocorrelation function, or the user given detrended time-series from ``ts_detrended`` argument. detrended_acfs : :obj:`np.ndarray`, optional This method's return value. Used to take advantage of precomputations. ts_detrended : :obj:`np.ndarray`, optional Detrended time-series. Used only if `detrend` is False. Returns ------- :obj:`np.ndarray` If `detrend` is True, the autocorrelation function up to `nlags` lags of the detrended time-series. If `detrend` is False, the autocorrelation function up to `nlags` lags of the time-series. """ if detrended_acfs is not None and (nlags is None or detrended_acfs.size == nlags): return detrended_acfs if detrend and ts_detrended is None: try: ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2] except ValueError: pass if ts_detrended is None: ts_detrended = ts if nlags is None: nlags = ts.size // 2 acf = statsmodels.tsa.stattools.acf(ts_detrended, nlags=nlags, unbiased=unbiased, fft=True) return acf[1:]
def ft_ami_detrended( cls, ts: np.ndarray, num_bins: int = 64, lags: t.Optional[t.Union[int, t.Sequence[int]]] = None, return_dist: bool = False, max_nlags: t.Optional[int] = None, ts_detrended: t.Optional[np.ndarray] = None, detrended_acfs: t.Optional[np.ndarray] = None, detrended_ami: t.Optional[np.ndarray] = None) -> np.ndarray: """Automutual information of the detrended time-series. The automutual information AMI is defined as: $$ AMI(ts) = H(ts_A) + H(ts_B) - H(ts_A, ts_B) $$ where `ts` is the time-series, $H$ is the Shannon entropy function, and $H(A, B)$ is the Shannon entropy of the joint probability of A and B. Parameters ---------- ts : :obj:`np.ndarray` One-dimensional time-series values. num_bins : int, optional (default=64) Number of histogram bins to estimate both the probability density of each lagged component, and the joint probability distribution, which are all necessary to the automutual information computation. lags : int or sequence of int, optional Lags to calculate the automutual information. If int, calculate the automutual information from lag 1 to up to the given ``lags`` value. If sequence of integers, calculate the automutual information for each of the given lags. If None, estimate the appropriate lag from the autocorrelation function (using the lag corresponding to the first non-positive value), and estimate the automutual information from lag 1 up to the estimated appropriate lag. return_dist : bool, optional (default=False) If True, return the automutual information distance for every lag, defined as: $$ DAMI(ts) = 1 - AMI(ts) / H(ts_A, ts_B) = (H(ts_A) + H(ts_B)) / H(ts_A, ts_B) $$ max_nlags : int, optional If ``lags`` is None, then a single lag will be estimated from the first negative value of the detrended time-series autocorrelation function up to `max_nlags`, if any. Otherwise, lag 1 will be used. Used only if ``detrended_acfs`` is None. ts_detrended : :obj:`np.ndarray`, optional Detrended time-series. If None, the time-series will be detrended using Friedman's Super Smoother. detrended_acfs : :obj:`np.ndarray`, optional Array of time-series autocorrelation function (for distinct ordered lags) of the detrended time-series. Used only if ``lag`` is None. If this argument is not given and the previous condiditon is meet, the autocorrelation function will be calculated inside this method up to ``max_nlags``. detrended_ami : :obj:`np.ndarray`, optional This method's return value. Used to take advantage of precomputations. Returns ------- :obj:`np.ndarray` If `return_dist` is False, return the automutual information of the time-series for all given lags. If `return_dist` is True, return the distance metric version of the automutual information for all given lags. References ---------- .. [1] Fraser AM, Swinney HL. Independent coordinates for strange attractors from mutual information. Phys Rev A Gen Phys. 1986;33(2):1134‐1140. doi:10.1103/physreva.33.1134 .. [2] B.D. Fulcher and N.S. Jones, "hctsa: A Computational Framework for Automated Time-Series Phenotyping Using Massive Feature Extraction, Cell Systems 5: 527 (2017). DOI: 10.1016/j.cels.2017.10.001 .. [3] B.D. Fulcher, M.A. Little, N.S. Jones, "Highly comparative time-series analysis: the empirical structure of time series and their methods", J. Roy. Soc. Interface 10(83) 20130048 (2013). DOI: 10.1098/rsif.2013.0048 .. [4] Thomas M. Cover and Joy A. Thomas. 1991. Elements of information theory. Wiley-Interscience, USA. """ if detrended_ami is not None: return detrended_ami if lags is None: lags = _embed.embed_lag(ts=ts, lag="acf", max_nlags=max_nlags, detrended_acfs=detrended_acfs) if np.isscalar(lags): lags = np.arange(1, 1 + int(lags)) # type: ignore if ts_detrended is None: ts_detrended = _detrend.decompose(ts=ts, ts_period=0)[2] _lags: t.Sequence[int] = np.asarray(lags, dtype=int) ami = np.zeros(len(_lags), dtype=float) for ind, lag in enumerate(_lags): ami[ind] = cls._calc_ami(ts=ts_detrended, lag=lag, num_bins=num_bins, return_dist=return_dist) return ami