Ejemplo n.º 1
0
    def precompute_gaussian_model(cls,
                                  ts: np.ndarray,
                                  random_state: t.Optional[int] = None,
                                  **kwargs) -> t.Dict[str, t.Any]:
        """Precompute a gaussian process model.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        random_state : int, optional
            Random seed to optimize the gaussian process model, to keep
            the results reproducible.

        kwargs:
            Additional arguments and previous precomputed items. May
            speed up this precomputation.

        Returns
        -------
        dict
            The following precomputed item is returned:
                * ``gaussian_model`` (:obj:`GaussianProcessRegressor`):
                    Gaussian process fitted model.
                * ``gaussian_resid`` (:obj:`np.ndarray`): Gaussian process
                    model residuals (diference from the original time-series).

            The following item is necessary and, therefore, also precomputed
            if necessary:
                * ``ts_scaled`` (:obj:`np.ndarray`): standardized time-series
                    values (z-score).
        """
        precomp_vals = {}  # type: t.Dict[str, t.Any]

        ts_scaled = kwargs.get("ts_scaled")

        if ts_scaled is None:
            precomp_vals["ts_scaled"] = _utils.standardize_ts(ts=ts)
            ts_scaled = precomp_vals["ts_scaled"]

        if "gaussian_model" not in kwargs:
            gaussian_model = _utils.fit_gaussian_process(
                ts=ts, ts_scaled=ts_scaled, random_state=random_state)
            precomp_vals["gaussian_model"] = gaussian_model

        gaussian_model = kwargs.get("gaussian_model",
                                    precomp_vals["gaussian_model"])

        if "gaussian_resid" not in kwargs:
            gaussian_resid = _utils.fit_gaussian_process(
                ts=ts,
                ts_scaled=ts_scaled,
                gaussian_model=gaussian_model,
                return_residuals=True)

            precomp_vals["gaussian_resid"] = gaussian_resid

        return precomp_vals
Ejemplo n.º 2
0
    def ft_gaussian_r_sqr(
        cls,
        ts: np.ndarray,
        random_state: t.Optional[int] = None,
        ts_scaled: t.Optional[np.ndarray] = None,
        gaussian_model: t.Optional[
            sklearn.gaussian_process.GaussianProcessRegressor] = None,
    ) -> float:
        """R^2 from a gaussian process model.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        random_state : int, optional
            Random seed to optimize the gaussian process model, to keep
            the results reproducible.

        ts_scaled : :obj:`np.ndarray`, optional
            Standardized time-series values. Used to take advantage of
            precomputations.

        gaussian_model : :obj:`GaussianProcessRegressor`, optional
            A fitted model of a gaussian process. Used to take advantage of
            precomputations.

        Returns
        -------
        float
            R^2 of a gaussian process model.

        References
        ----------
        .. [1] B.D. Fulcher and N.S. Jones, "hctsa: A Computational Framework
            for Automated Time-Series Phenotyping Using Massive Feature
            Extraction, Cell Systems 5: 527 (2017).
            DOI: 10.1016/j.cels.2017.10.001
        .. [2] B.D. Fulcher, M.A. Little, N.S. Jones, "Highly comparative
            time-series analysis: the empirical structure of time series and
            their methods", J. Roy. Soc. Interface 10(83) 20130048 (2013).
            DOI: 10.1098/rsif.2013.0048
        """
        ts_scaled = _utils.standardize_ts(ts=ts, ts_scaled=ts_scaled)

        gaussian_model = _utils.fit_gaussian_process(
            ts=ts_scaled,
            random_state=random_state,
            gaussian_model=gaussian_model,
            ts_scaled=ts_scaled)

        X = np.linspace(0, 1, ts_scaled.size).reshape(-1, 1)
        r_squared = gaussian_model.score(X=X, y=ts_scaled)

        return r_squared
Ejemplo n.º 3
0
    def ft_gresid_lbtest(
            cls,
            ts: np.ndarray,
            nlags: int = 8,
            return_pval: bool = True,
            random_state: t.Optional[int] = None,
            ts_scaled: t.Optional[np.ndarray] = None,
            gaussian_resid: t.Optional[np.ndarray] = None,
            gaussian_model: t.Optional[
                sklearn.gaussian_process.GaussianProcessRegressor] = None,
    ) -> np.ndarray:
        """Ljung–Box test in the residuals of a gaussian process model.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        nlags : int, optional (default=8)
            Number of lags evaluated in the Ljung-Box test.

        return_pval : bool, optional (default=True)
            If True, return the p-value of the test instead of the test
            statistic.

        random_state : int, optional
            Random seed to optimize the gaussian process model, to keep
            the results reproducible.

        ts_scaled : :obj:`np.ndarray`, optional
            Standardized time-series values. Used to take advantage of
            precomputations. Used only if ``gaussian_resid`` is None.

        gaussian_resid : :obj:`np.ndarray`, optional
            Residuals of a gaussian process. Used to take advantage of
            precomputations.

        gaussian_model : :obj:`GaussianProcessRegressor`, optional
            A fitted model of a gaussian process. Used to take advantage of
            precomputations. Used only if ``gaussian_resid`` is None.

        Returns
        -------
        :obj:`np.ndarray`
            If `return_pval` is False, Ljung-Box test statistic for each lag
            of the gaussian process residuals.
            If `return_pval` is True, p-value associated with the Ljung-Box
            test statistic for each lag of the gaussian process residuals.

        References
        ----------
        .. [1] B.D. Fulcher and N.S. Jones, "hctsa: A Computational Framework
            for Automated Time-Series Phenotyping Using Massive Feature
            Extraction, Cell Systems 5: 527 (2017).
            DOI: 10.1016/j.cels.2017.10.001
        .. [2] B.D. Fulcher, M.A. Little, N.S. Jones, "Highly comparative
            time-series analysis: the empirical structure of time series and
            their methods", J. Roy. Soc. Interface 10(83) 20130048 (2013).
            DOI: 10.1098/rsif.2013.0048
        """
        if gaussian_resid is None:
            gaussian_resid = _utils.fit_gaussian_process(
                ts=ts,
                ts_scaled=ts_scaled,
                random_state=random_state,
                gaussian_model=gaussian_model,
                return_residuals=True)

        gaussian_lb_test = stat_tests.MFETSStatTests.ft_test_lb(
            ts_residuals=gaussian_resid,
            max_nlags=nlags,
            return_pval=return_pval)

        return gaussian_lb_test
Ejemplo n.º 4
0
    def ft_gresid_autocorr(
            cls,
            ts: np.ndarray,
            nlags: int = 8,
            unbiased: bool = True,
            random_state: t.Optional[int] = None,
            ts_scaled: t.Optional[np.ndarray] = None,
            gaussian_resid: t.Optional[np.ndarray] = None,
            gaussian_model: t.Optional[
                sklearn.gaussian_process.GaussianProcessRegressor] = None,
    ) -> np.ndarray:
        """Autocorrelation function of the gaussian process model residuals.

        Parameters
        ----------
        ts : :obj:`np.ndarray`
            One-dimensional time-series values.

        nlags : int, optional (default=8)
            Number of lags evaluated in the autocorrelation function.

        unbiased : bool, optional (default=True)
            If True, the autocorrelation function is corrected for statistical
            bias.

        random_state : int, optional
            Random seed to optimize the gaussian process model, to keep
            the results reproducible.

        ts_scaled : :obj:`np.ndarray`, optional
            Standardized time-series values. Used to take advantage of
            precomputations. Used only if ``gaussian_resid`` is None.

        gaussian_resid : :obj:`np.ndarray`, optional
            Residuals of a gaussian process. Used to take advantage of
            precomputations.

        gaussian_model : :obj:`GaussianProcessRegressor`, optional
            A fitted model of a gaussian process. Used to take advantage of
            precomputations. Used only if ``gaussian_resid`` is None.

        Returns
        -------
        :obj:`np.ndarray`
            Autocorrelation function of the gaussian process residuals up
            to ``nlags``.

        References
        ----------
        .. [1] B.D. Fulcher and N.S. Jones, "hctsa: A Computational Framework
            for Automated Time-Series Phenotyping Using Massive Feature
            Extraction, Cell Systems 5: 527 (2017).
            DOI: 10.1016/j.cels.2017.10.001
        .. [2] B.D. Fulcher, M.A. Little, N.S. Jones, "Highly comparative
            time-series analysis: the empirical structure of time series and
            their methods", J. Roy. Soc. Interface 10(83) 20130048 (2013).
            DOI: 10.1098/rsif.2013.0048
        """
        if gaussian_resid is None:
            gaussian_resid = _utils.fit_gaussian_process(
                ts=ts,
                ts_scaled=ts_scaled,
                random_state=random_state,
                gaussian_model=gaussian_model,
                return_residuals=True)

        gaussian_resid_acf = cls._calc_acf(ts=gaussian_resid,
                                           nlags=nlags,
                                           unbiased=unbiased)

        return gaussian_resid_acf