def transform(self, X, y=None): """Project X on the principal components. Parameters ---------- X : array-like, shape=[n_samples, n_features] Data, where n_samples is the number of samples and n_features is the number of features. y : Ignored (Compliance with scikit-learn interface) Returns ------- X_new : array-like, shape=[n_samples, n_components] """ tangent_vecs = self.metric.log(X, base_point=self.base_point_fit) if self.point_type == 'matrix': if Matrices.is_symmetric(tangent_vecs).all(): X = SymmetricMatrices.vector_from_symmetric_matrix( tangent_vecs) else: X = gs.reshape(tangent_vecs, (len(X), -1)) else: X = tangent_vecs return super(TangentPCA, self).transform(X)
def transform(self, X, y=None, base_point=None): """Lift data to a tangent space. Compute the logs of all data point and reshapes them to 1d vectors if necessary. By default the logs are taken at the mean but any other base point can be passed. Any machine learning algorithm can then be used with the output array. Parameters ---------- X : array-like, shape=[n_samples, {dim, [n, n]}] Data to transform. y : Ignored (Compliance with scikit-learn interface) base_point : array-like, shape={dim, [n,n]}, optional (mean) Point on the manifold, the returned samples will be tangent vectors at the base point. Returns ------- X_new : array-like, shape=[n_samples, dim] """ # TODO(nguis): put this in a dedicated class if base_point is None: base_point = self.estimate_ if self.estimate_ is None: raise RuntimeError('fit needs to be called first or a ' 'base_point passed.') tangent_vecs = self.metric.log(X, base_point=base_point) if self.point_type == 'vector': return tangent_vecs if gs.all(Matrices.is_symmetric(tangent_vecs)): X = SymmetricMatrices.vector_from_symmetric_matrix(tangent_vecs) elif gs.all(Matrices.is_skew_symmetric(tangent_vecs)): X = SkewSymmetricMatrices( tangent_vecs.shape[-1]).basis_representation(tangent_vecs) else: X = gs.reshape(tangent_vecs, (len(X), -1)) return X
class TestSymmetricMatricesMethods(geomstats.tests.TestCase): """Test of SymmetricMatrices methods.""" def setUp(self): """Set up the test.""" warnings.simplefilter('ignore', category=ImportWarning) gs.random.seed(1234) self.n = 3 self.space = SymmetricMatrices(self.n) def test_belongs(self): """Test of belongs method.""" sym_n = self.space mat_sym = gs.array([[1., 2., 3.], [2., 4., 5.], [3., 5., 6.]]) mat_not_sym = gs.array([[1., 0., 3.], [2., 4., 5.], [3., 5., 6.]]) result = sym_n.belongs(mat_sym) expected = True self.assertAllClose(result, expected) result = sym_n.belongs(mat_not_sym) expected = False self.assertAllClose(result, expected) @geomstats.tests.np_and_pytorch_only def test_basis(self): """Test of belongs method.""" sym_n = SymmetricMatrices(2) mat_sym_1 = gs.array([[1., 0.], [0, 0]]) mat_sym_2 = gs.array([[0, 1.], [1., 0]]) mat_sym_3 = gs.array([[0, 0.], [0, 1.]]) expected = gs.stack([mat_sym_1, mat_sym_2, mat_sym_3]) result = sym_n.basis self.assertAllClose(result, expected) def test_expm(self): """Test of expm method.""" sym_n = SymmetricMatrices(self.n) v = gs.array([[0., 1., 0.], [1., 0., 0.], [0., 0., 1.]]) result = sym_n.expm(v) c = math.cosh(1) s = math.sinh(1) e = math.exp(1) expected = gs.array([[c, s, 0.], [s, c, 0.], [0., 0., e]]) self.assertAllClose(result, expected) def test_powerm(self): """Test of powerm method.""" sym_n = SymmetricMatrices(self.n) expected = gs.array( [[[1, 1. / 4., 0.], [1. / 4, 2., 0.], [0., 0., 1.]]]) expected = gs.cast(expected, gs.float64) power = gs.array(1. / 2) power = gs.cast(power, gs.float64) result = sym_n.powerm(expected, power) result = gs.matmul(result, gs.transpose(result, (0, 2, 1))) self.assertAllClose(result, expected) @geomstats.tests.np_and_pytorch_only def test_vector_from_symmetric_matrix_and_symmetric_matrix_from_vector( self): """Test for matrix to vector and vector to matrix conversions.""" sym_mat_1 = gs.array([[1., 0.6, -3.], [0.6, 7., 0.], [-3., 0., 8.]]) vector_1 = self.space.vector_from_symmetric_matrix(sym_mat_1) result_1 = self.space.symmetric_matrix_from_vector(vector_1) expected_1 = sym_mat_1 self.assertTrue(gs.allclose(result_1, expected_1)) vector_2 = gs.array([1, 2, 3, 4, 5, 6]) sym_mat_2 = self.space.symmetric_matrix_from_vector(vector_2) result_2 = self.space.vector_from_symmetric_matrix(sym_mat_2) expected_2 = vector_2 self.assertTrue(gs.allclose(result_2, expected_2)) @geomstats.tests.np_and_pytorch_only def test_vector_and_symmetric_matrix_vectorization(self): """Test of vectorization.""" n_samples = 5 vector = gs.random.rand(n_samples, 6) sym_mat = self.space.symmetric_matrix_from_vector(vector) result = self.space.vector_from_symmetric_matrix(sym_mat) expected = vector self.assertTrue(gs.allclose(result, expected)) vector = self.space.vector_from_symmetric_matrix(sym_mat) result = self.space.symmetric_matrix_from_vector(vector) expected = sym_mat self.assertTrue(gs.allclose(result, expected)) def test_symmetric_matrix_from_vector(self): vector_2 = gs.array([1, 2, 3, 4, 5, 6]) result = self.space.symmetric_matrix_from_vector(vector_2) expected = gs.array([[1., 2., 3.], [2., 4., 5.], [3., 5., 6.]]) self.assertAllClose(result, expected)
def _fit(self, X, base_point=None): """Fit the model by computing full SVD on X. Parameters ---------- X : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. y : Ignored (Compliance with scikit-learn interface) base_point : array-like, shape=[n_samples, n_features] Point at which to perform the tangent PCA Optional, default to Frechet mean if None point_type : str, {'vector', 'matrix'} Optional Returns ------- U, S, V: SVD decomposition """ if base_point is None: mean = FrechetMean(metric=self.metric, point_type=self.point_type) mean.fit(X) base_point = mean.estimate_ tangent_vecs = self.metric.log(X, base_point=base_point) if self.point_type == 'matrix': if Matrices.is_symmetric(tangent_vecs).all(): X = SymmetricMatrices.vector_from_symmetric_matrix( tangent_vecs) else: X = gs.reshape(tangent_vecs, (len(X), -1)) else: X = tangent_vecs X = check_array(X, dtype=[gs.float64, gs.float32], ensure_2d=True, copy=self.copy) if self.n_components is None: n_components = min(X.shape) else: n_components = self.n_components n_samples, n_features = X.shape if n_components == 'mle': if n_samples < n_features: raise ValueError("n_components='mle' is only supported " "if n_samples >= n_features") elif not 0 <= n_components <= min(n_samples, n_features): raise ValueError("n_components=%r must be between 0 and " "min(n_samples, n_features)=%r with " "svd_solver='full'" % (n_components, min(n_samples, n_features))) elif n_components >= 1: if not isinstance(n_components, numbers.Integral): raise ValueError("n_components=%r must be of type int " "when greater than or equal to 1, " "was of type=%r" % (n_components, type(n_components))) # Center data - the mean should be 0 if base_point is the Frechet mean self.mean_ = gs.mean(X, axis=0) X -= self.mean_ U, S, V = linalg.svd(X, full_matrices=False) # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U, V) components_ = V # Get variance explained by singular values explained_variance_ = (S**2) / (n_samples - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var singular_values_ = S.copy() # Store the singular values. # Postprocess the number of components required if n_components == 'mle': n_components = \ _infer_dimension_(explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = stable_cumsum(explained_variance_ratio_) n_components = gs.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.base_point_fit = base_point self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = int(n_components) self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = \ explained_variance_ratio_[:n_components] self.singular_values_ = singular_values_[:n_components] return U, S, V