Example #1
0
    def transform(self, X, y=None):
        """Concatenate multivariate time series/panel data into long
        univariate time series/panel
        data by simply concatenating times series in time.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and single
          column
        """
        self.check_is_fitted()
        X = check_X(X)

        # We concatenate by tabularizing all columns and then detabularizing
        # them into a single column
        if isinstance(X, pd.DataFrame):
            Xt = from_nested_to_2d_array(X)
        else:
            Xt = from_3d_numpy_to_2d_array(X)
        return from_2d_array_to_nested(Xt)
Example #2
0
    def transform(self, X, y=None):
        """
        Transform X, transforms univariate time-series using sklearn's PCA
        class

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, 1]
            Nested dataframe with univariate time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with the same number of rows and the
          (potentially reduced) PCA transformed
          column. Time indices of the original column are replaced with 0:(
          n_components - 1).
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        X = X.squeeze(1)

        # Transform X using the fitted PCA
        Xpca = pd.DataFrame(data=self.pca.transform(X))

        # Back-transform into time series data format
        Xt = from_2d_array_to_nested(Xpca)
        return Xt
Example #3
0
    def inverse_transform(self, X, y=None):
        """Transform tabular pandas dataframe into nested dataframe.

        Parameters
        ----------
        X : pandas DataFrame
            Tabular dataframe with primitives in cells.
        y : array-like, optional (default=None)

        Returns
        -------
        Xt : pandas DataFrame
            Transformed dataframe with series in cells.
        """
        self.check_is_fitted()
        if len(self._columns) > 1:
            raise NotImplementedError(f"`inverse-transform` currently only "
                                      f"handles univariate data, but found: "
                                      f"{len(self._columns)} columns in `fit`")

            # we expect a tabular pd.DataFrame or np.array here, hence we use
            # scikit-learn's input validation function
        X = check_array(X)

        Xt = from_2d_array_to_nested(X, time_index=self._time_index)
        Xt.columns = self._columns
        return Xt
Example #4
0
def test_pca_results(n_components):
    np.random.seed(42)

    # sklearn
    X = pd.DataFrame(data=np.random.randn(10, 5))
    pca = PCA(n_components=n_components)
    Xt1 = pca.fit_transform(X)

    # sktime
    Xs = from_2d_array_to_nested(X)
    pca_transform = PCATransformer(n_components=n_components)
    Xt2 = pca_transform.fit_transform(Xs)

    assert np.allclose(np.asarray(Xt1), np.asarray(from_nested_to_2d_array(Xt2)))
Example #5
0
def test_output_format_dim(len_series, n_instances, n_components):
    np.random.seed(42)
    X = from_2d_array_to_nested(
        pd.DataFrame(data=np.random.randn(n_instances, len_series)))

    trans = PCATransformer(n_components=n_components)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of principal components in the output.
    assert from_nested_to_2d_array(Xt).shape[1] == min(
        n_components,
        from_nested_to_2d_array(X).shape[1])
Example #6
0
    def inverse_transform(self, X, y=None):
        """Transform tabular pandas dataframe into nested dataframe.

        Parameters
        ----------
        X : pandas DataFrame
            Tabular dataframe with primitives in cells.
        y : array-like, optional (default=None)

        Returns
        -------
        Xt : pandas DataFrame
            Transformed dataframe with series in cells.
        """
        self.check_is_fitted()
        # We expect a tabular pd.DataFrame or np.array here, hence we use
        # scikit-learn's input validation function.
        X = check_array(X)
        return from_2d_array_to_nested(X)
Example #7
0
def test_pca_kwargs(kwargs):
    np.random.seed(42)
    X = from_2d_array_to_nested(pd.DataFrame(data=np.random.randn(10, 5)))
    pca = PCATransformer(n_components=1, **kwargs)
    pca.fit_transform(X)