Esempio n. 1
0
    def transform(self, X, y=None):
        """Project X on the principal components.

        Parameters
        ----------
        X : array-like, shape=[n_samples, n_features]
            Data, where n_samples is the number of samples
            and n_features is the number of features.
        y : Ignored (Compliance with scikit-learn interface)

        Returns
        -------
        X_new : array-like, shape=[n_samples, n_components]
        """
        tangent_vecs = self.metric.log(X, base_point=self.base_point_fit)
        if self.point_type == 'matrix':
            if Matrices.is_symmetric(tangent_vecs).all():
                X = SymmetricMatrices.vector_from_symmetric_matrix(
                    tangent_vecs)
            else:
                X = gs.reshape(tangent_vecs, (len(X), -1))
        else:
            X = tangent_vecs

        return super(TangentPCA, self).transform(X)
Esempio n. 2
0
    def transform(self, X, y=None, base_point=None):
        """Lift data to a tangent space.

        Compute the logs of all data point and reshapes them to
        1d vectors if necessary. By default the logs are taken at the mean
        but any other base point can be passed. Any machine learning
        algorithm can then be used with the output array.

        Parameters
        ----------
        X : array-like, shape=[n_samples, {dim, [n, n]}]
            Data to transform.
        y : Ignored (Compliance with scikit-learn interface)
        base_point : array-like, shape={dim, [n,n]}, optional (mean)
            Point on the manifold, the returned samples will be tangent
            vectors at the base point.

        Returns
        -------
        X_new : array-like, shape=[n_samples, dim]
        """
        # TODO(nguis): put this in a dedicated class
        if base_point is None:
            base_point = self.estimate_

            if self.estimate_ is None:
                raise RuntimeError('fit needs to be called first or a '
                                   'base_point passed.')

        tangent_vecs = self.metric.log(X, base_point=base_point)

        if self.point_type == 'vector':
            return tangent_vecs

        if gs.all(Matrices.is_symmetric(tangent_vecs)):
            X = SymmetricMatrices.vector_from_symmetric_matrix(tangent_vecs)
        elif gs.all(Matrices.is_skew_symmetric(tangent_vecs)):
            X = SkewSymmetricMatrices(
                tangent_vecs.shape[-1]).basis_representation(tangent_vecs)
        else:
            X = gs.reshape(tangent_vecs, (len(X), -1))

        return X
Esempio n. 3
0
class TestSymmetricMatricesMethods(geomstats.tests.TestCase):
    """Test of SymmetricMatrices methods."""

    def setUp(self):
        """Set up the test."""
        warnings.simplefilter('ignore', category=ImportWarning)

        gs.random.seed(1234)

        self.n = 3
        self.space = SymmetricMatrices(self.n)

    def test_belongs(self):
        """Test of belongs method."""
        sym_n = self.space
        mat_sym = gs.array([[1., 2., 3.],
                            [2., 4., 5.],
                            [3., 5., 6.]])
        mat_not_sym = gs.array([[1., 0., 3.],
                                [2., 4., 5.],
                                [3., 5., 6.]])
        result = sym_n.belongs(mat_sym)
        expected = True
        self.assertAllClose(result, expected)

        result = sym_n.belongs(mat_not_sym)
        expected = False
        self.assertAllClose(result, expected)

    @geomstats.tests.np_and_pytorch_only
    def test_basis(self):
        """Test of belongs method."""
        sym_n = SymmetricMatrices(2)
        mat_sym_1 = gs.array([[1., 0.], [0, 0]])
        mat_sym_2 = gs.array([[0, 1.], [1., 0]])
        mat_sym_3 = gs.array([[0, 0.], [0, 1.]])
        expected = gs.stack([mat_sym_1, mat_sym_2, mat_sym_3])
        result = sym_n.basis
        self.assertAllClose(result, expected)

    def test_expm(self):
        """Test of expm method."""
        sym_n = SymmetricMatrices(self.n)
        v = gs.array([[0., 1., 0.],
                      [1., 0., 0.],
                      [0., 0., 1.]])
        result = sym_n.expm(v)
        c = math.cosh(1)
        s = math.sinh(1)
        e = math.exp(1)
        expected = gs.array([[c, s, 0.],
                             [s, c, 0.],
                             [0., 0., e]])
        self.assertAllClose(result, expected)

    def test_powerm(self):
        """Test of powerm method."""
        sym_n = SymmetricMatrices(self.n)
        expected = gs.array(
            [[[1, 1. / 4., 0.], [1. / 4, 2., 0.], [0., 0., 1.]]])
        expected = gs.cast(expected, gs.float64)
        power = gs.array(1. / 2)
        power = gs.cast(power, gs.float64)
        result = sym_n.powerm(expected, power)
        result = gs.matmul(result, gs.transpose(result, (0, 2, 1)))
        self.assertAllClose(result, expected)

    @geomstats.tests.np_and_pytorch_only
    def test_vector_from_symmetric_matrix_and_symmetric_matrix_from_vector(
            self):
        """Test for matrix to vector and vector to matrix conversions."""
        sym_mat_1 = gs.array([[1., 0.6, -3.],
                              [0.6, 7., 0.],
                              [-3., 0., 8.]])
        vector_1 = self.space.vector_from_symmetric_matrix(sym_mat_1)
        result_1 = self.space.symmetric_matrix_from_vector(vector_1)
        expected_1 = sym_mat_1

        self.assertTrue(gs.allclose(result_1, expected_1))

        vector_2 = gs.array([1, 2, 3, 4, 5, 6])
        sym_mat_2 = self.space.symmetric_matrix_from_vector(vector_2)
        result_2 = self.space.vector_from_symmetric_matrix(sym_mat_2)
        expected_2 = vector_2

        self.assertTrue(gs.allclose(result_2, expected_2))

    @geomstats.tests.np_and_pytorch_only
    def test_vector_and_symmetric_matrix_vectorization(self):
        """Test of vectorization."""
        n_samples = 5
        vector = gs.random.rand(n_samples, 6)
        sym_mat = self.space.symmetric_matrix_from_vector(vector)
        result = self.space.vector_from_symmetric_matrix(sym_mat)
        expected = vector

        self.assertTrue(gs.allclose(result, expected))

        vector = self.space.vector_from_symmetric_matrix(sym_mat)
        result = self.space.symmetric_matrix_from_vector(vector)
        expected = sym_mat

        self.assertTrue(gs.allclose(result, expected))

    def test_symmetric_matrix_from_vector(self):
        vector_2 = gs.array([1, 2, 3, 4, 5, 6])
        result = self.space.symmetric_matrix_from_vector(vector_2)
        expected = gs.array([[1., 2., 3.], [2., 4., 5.], [3., 5., 6.]])
        self.assertAllClose(result, expected)
Esempio n. 4
0
    def _fit(self, X, base_point=None):
        """Fit the model by computing full SVD on X.

        Parameters
        ----------
        X : array-like, shape=[n_samples, n_features]
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : Ignored (Compliance with scikit-learn interface)
        base_point : array-like, shape=[n_samples, n_features]
            Point at which to perform the tangent PCA
            Optional, default to Frechet mean if None
        point_type : str, {'vector', 'matrix'}
            Optional

        Returns
        -------
        U, S, V: SVD decomposition
        """
        if base_point is None:
            mean = FrechetMean(metric=self.metric, point_type=self.point_type)
            mean.fit(X)
            base_point = mean.estimate_

        tangent_vecs = self.metric.log(X, base_point=base_point)

        if self.point_type == 'matrix':
            if Matrices.is_symmetric(tangent_vecs).all():
                X = SymmetricMatrices.vector_from_symmetric_matrix(
                    tangent_vecs)
            else:
                X = gs.reshape(tangent_vecs, (len(X), -1))
        else:
            X = tangent_vecs

        X = check_array(X,
                        dtype=[gs.float64, gs.float32],
                        ensure_2d=True,
                        copy=self.copy)

        if self.n_components is None:
            n_components = min(X.shape)
        else:
            n_components = self.n_components
        n_samples, n_features = X.shape

        if n_components == 'mle':
            if n_samples < n_features:
                raise ValueError("n_components='mle' is only supported "
                                 "if n_samples >= n_features")
        elif not 0 <= n_components <= min(n_samples, n_features):
            raise ValueError("n_components=%r must be between 0 and "
                             "min(n_samples, n_features)=%r with "
                             "svd_solver='full'" %
                             (n_components, min(n_samples, n_features)))
        elif n_components >= 1:
            if not isinstance(n_components, numbers.Integral):
                raise ValueError("n_components=%r must be of type int "
                                 "when greater than or equal to 1, "
                                 "was of type=%r" %
                                 (n_components, type(n_components)))

        # Center data - the mean should be 0 if base_point is the Frechet mean
        self.mean_ = gs.mean(X, axis=0)
        X -= self.mean_

        U, S, V = linalg.svd(X, full_matrices=False)
        # flip eigenvectors' sign to enforce deterministic output
        U, V = svd_flip(U, V)

        components_ = V

        # Get variance explained by singular values
        explained_variance_ = (S**2) / (n_samples - 1)
        total_var = explained_variance_.sum()
        explained_variance_ratio_ = explained_variance_ / total_var
        singular_values_ = S.copy()  # Store the singular values.

        # Postprocess the number of components required
        if n_components == 'mle':
            n_components = \
                _infer_dimension_(explained_variance_, n_samples, n_features)
        elif 0 < n_components < 1.0:
            # number of components for which the cumulated explained
            # variance percentage is superior to the desired threshold
            ratio_cumsum = stable_cumsum(explained_variance_ratio_)
            n_components = gs.searchsorted(ratio_cumsum, n_components) + 1

        # Compute noise covariance using Probabilistic PCA model
        # The sigma2 maximum likelihood (cf. eq. 12.46)
        if n_components < min(n_features, n_samples):
            self.noise_variance_ = explained_variance_[n_components:].mean()
        else:
            self.noise_variance_ = 0.

        self.base_point_fit = base_point
        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = components_[:n_components]
        self.n_components_ = int(n_components)
        self.explained_variance_ = explained_variance_[:n_components]
        self.explained_variance_ratio_ = \
            explained_variance_ratio_[:n_components]
        self.singular_values_ = singular_values_[:n_components]

        return U, S, V