def _maximization(self, data, posterior_probabilities): """Update function for the means and variances. Parameters ---------- data : array-like, shape=[n_samples, n_features,] Training data, where n_samples is the number of samples and n_features is the number of features. posterior_probabilities : array-like, shape=[n_samples, n_gaussians,] Probability of a given sample to belong to a component of the GMM, computed for all components. """ self.update_posterior_probabilities(posterior_probabilities) if (gs.mean(self.mixture_coefficients) != gs.mean( self.mixture_coefficients)): logging.warning('UPDATE : mixture coefficients ' 'contain elements that are not numbers') self.update_means(data, posterior_probabilities) if self.means.mean() != self.means.mean(): logging.warning('UPDATE : means contain' 'not a number elements') self.update_variances(data, posterior_probabilities) if self.variances.mean() != self.variances.mean(): logging.warning('UPDATE : variances contain' 'not a number elements')
def setup_method(self): gs.random.seed(1234) self.n_samples = 20 # Set up for hypersphere self.dim_sphere = 4 self.shape_sphere = (self.dim_sphere + 1, ) self.sphere = Hypersphere(dim=self.dim_sphere) X = gs.random.rand(self.n_samples) self.X_sphere = X - gs.mean(X) self.intercept_sphere_true = self.sphere.random_point() self.coef_sphere_true = self.sphere.projection( gs.random.rand(self.dim_sphere + 1)) self.y_sphere = self.sphere.metric.exp( self.X_sphere[:, None] * self.coef_sphere_true, base_point=self.intercept_sphere_true, ) self.param_sphere_true = gs.vstack( [self.intercept_sphere_true, self.coef_sphere_true]) self.param_sphere_guess = gs.vstack([ self.y_sphere[0], self.sphere.to_tangent(gs.random.normal(size=self.shape_sphere), self.y_sphere[0]), ]) # Set up for special euclidean self.se2 = SpecialEuclidean(n=2) self.metric_se2 = self.se2.left_canonical_metric self.metric_se2.default_point_type = "matrix" self.shape_se2 = (3, 3) X = gs.random.rand(self.n_samples) self.X_se2 = X - gs.mean(X) self.intercept_se2_true = self.se2.random_point() self.coef_se2_true = self.se2.to_tangent( 5.0 * gs.random.rand(*self.shape_se2), self.intercept_se2_true) self.y_se2 = self.metric_se2.exp( self.X_se2[:, None, None] * self.coef_se2_true[None], self.intercept_se2_true, ) self.param_se2_true = gs.vstack([ gs.flatten(self.intercept_se2_true), gs.flatten(self.coef_se2_true), ]) self.param_se2_guess = gs.vstack([ gs.flatten(self.y_se2[0]), gs.flatten( self.se2.to_tangent(gs.random.normal(size=self.shape_se2), self.y_se2[0])), ])
def _expectation(self, data): """Update the posterior probabilities. Parameters ---------- data : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. """ probability_distribution_function = gmm_pdf( data, self.means, self.variances, norm_func=find_normalization_factor, metric=self.metric, variances_range=self.variances_range, norm_func_var=self.normalization_factor_var, ) if gs.isnan(probability_distribution_function.mean()): logging.warning("EXPECTATION : Probability distribution function" "contain elements that are not numbers") num_normalized_pdf = gs.einsum("j,...j->...j", self.mixture_coefficients, probability_distribution_function) valid_pdf_condition = gs.amin(gs.sum(num_normalized_pdf, -1)) if valid_pdf_condition <= PDF_TOL: num_normalized_pdf[gs.sum(num_normalized_pdf, -1) <= PDF_TOL] = 1 sum_pdf = gs.sum(num_normalized_pdf, -1) posterior_probabilities = gs.einsum("...i,...->...i", num_normalized_pdf, 1 / sum_pdf) if gs.any(gs.mean(posterior_probabilities)) is None: logging.warning("EXPECTATION : posterior probabilities " "contain elements that are not numbers.") if (1 - SUM_CHECK_PDF >= gs.mean(gs.sum(posterior_probabilities, 1)) >= 1 + SUM_CHECK_PDF): logging.warning("EXPECTATION : posterior probabilities " "do not sum to 1.") if gs.any(gs.sum(posterior_probabilities, 0) < PDF_TOL): logging.warning("EXPECTATION : Gaussian got no elements " "(precision error) reinitialize") posterior_probabilities[posterior_probabilities == 0] = PDF_TOL return posterior_probabilities
def _circle_mean(points): """Determine the mean on a circle. Data are expected in radians in the range [-pi, pi). The mean is returned in the same range. If the mean is unique, this algorithm is guaranteed to find it. It is not vulnerable to local minima of the Frechet function. If the mean is not unique, the algorithm only returns one of the means. Which mean is returned depends on numerical rounding errors. Reference --------- ..[HH15] Hotz, T. and S. F. Huckemann (2015), "Intrinsic means on the circle: Uniqueness, locus and asymptotics", Annals of the Institute of Statistical Mathematics 67 (1), 177–193. https://arxiv.org/abs/1108.2141 """ if points.ndim > 1: points_ = Hypersphere.extrinsic_to_angle(points) else: points_ = gs.copy(points) sample_size = points_.shape[0] mean0 = gs.mean(points_) var0 = gs.sum((points_ - mean0) ** 2) sorted_points = gs.sort(points_) means = _circle_variances(mean0, var0, sample_size, sorted_points) return means[gs.argmin(means[:, 1]), 0]
def setup_method(self): gs.random.seed(1234) self.n_samples = 20 # Set up for hypersphere self.dim_sphere = 2 self.shape_sphere = (self.dim_sphere + 1, ) self.sphere = Hypersphere(dim=self.dim_sphere) self.intercept_sphere_true = gs.array([0.0, -1.0, 0.0]) self.coef_sphere_true = gs.array([1.0, 0.0, 0.5]) # set up the prior self.prior = lambda x: self.sphere.metric.exp( x * self.coef_sphere_true, base_point=self.intercept_sphere_true, ) self.kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10.0, (1e-2, 1e2)) # generate data X = gs.linspace(0.0, 1.5 * gs.pi, self.n_samples) self.X_sphere = gs.reshape((X - gs.mean(X)), (-1, 1)) # generate the geodesic y = self.prior(self.X_sphere) # Then add orthogonal sinusoidal oscillations o = (1.0 / 20.0) * gs.array([-0.5, 0.0, 1.0]) o = self.sphere.to_tangent(o, base_point=y) s = self.X_sphere * gs.sin(5.0 * gs.pi * self.X_sphere) self.y_sphere = self.sphere.metric.exp(s * o, base_point=y)
def fit(self, X, y, weights=None, compute_training_score=False): """Estimate the parameters of the geodesic regression. Estimate the intercept and the coefficient defining the geodesic regression model. Parameters ---------- X : {array-like, sparse matrix}, shape=[...,}] Training input samples. y : array-like, shape=[..., {dim, [n,n]}] Training target values. weights : array-like, shape=[...,] Weights associated to the points. Optional, default: None. compute_training_score : bool Whether to compute R^2. Optional, default: False. Returns ------- self : object Returns self. """ times = gs.copy(X) if self.center_X: self.mean_ = gs.mean(X) times -= self.mean_ if self.method == "extrinsic": return self._fit_extrinsic(times, y, weights, compute_training_score) if self.method == "riemannian": return self._fit_riemannian(times, y, weights, compute_training_score)
def _maximization(self, data, posterior_probabilities, lr_means, conv_factor_mean, max_iter=DEFAULT_MAX_ITER): """Update function for the means and variances. Parameters ---------- data : array-like, shape=[n_samples, n_features,] Training data, where n_samples is the number of samples and n_features is the number of features. posterior_probabilities : array-like, shape=[n_samples, n_gaussians,] Probability of a given sample to belong to a component of the GMM, computed for all components. lr_means : float Learning rate for computing the means. conv_factor_mean : float Convergence factor for means. max_iter : int Optional, default: 100. Maximum number of iterations for computing the means. """ self.update_posterior_probabilities(posterior_probabilities) if(gs.mean(self.mixture_coefficients) != gs.mean(self.mixture_coefficients)): logging.warning('UPDATE : mixture coefficients ' 'contain elements that are not numbers') self.update_means(data, posterior_probabilities, lr_means=lr_means, tau_means=conv_factor_mean, max_iter=max_iter) if self.means.mean() != self.means.mean(): logging.warning('UPDATE : means contain' 'not a number elements') self.update_variances(data, posterior_probabilities) if self.variances.mean() != self.variances.mean(): logging.warning('UPDATE : variances contain' 'not a number elements')
def _expectation(self, data): """Update the posterior probabilities. Parameters ---------- data : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. """ probability_distribution_function = \ PoincareBall.gmm_pdf( data, self.means, self.variances, norm_func=self.riemannian_metric.find_normalization_factor, metric=self.riemannian_metric, variances_range=self.variances_range, norm_func_var=self.normalization_factor_var) if gs.isnan(probability_distribution_function.mean()): logging.warning('EXPECTATION : Probability distribution function' 'contain elements that are not numbers') num_normalized_pdf = gs.einsum('j,...j->...j', self.mixture_coefficients, probability_distribution_function) valid_pdf_condition = gs.amin(gs.sum(num_normalized_pdf, -1)) if valid_pdf_condition <= PDF_TOL: num_normalized_pdf[gs.sum(num_normalized_pdf, -1) <= PDF_TOL] = 1 sum_pdf = gs.sum(num_normalized_pdf, -1) posterior_probabilities =\ gs.einsum('...i,...->...i', num_normalized_pdf, 1 / sum_pdf) if gs.any(gs.mean(posterior_probabilities)) is None: logging.warning('EXPECTATION : posterior probabilities ' 'contain elements that are not numbers.') if 1 - SUM_CHECK_PDF >= gs.mean(gs.sum( posterior_probabilities, 1)) >= 1 + SUM_CHECK_PDF: logging.warning('EXPECTATION : posterior probabilities ' 'do not sum to 1.') return posterior_probabilities
def empirical_frechet_var_bubble(n_samples, theta, dim, n_expectation=1000): """Variance of the empirical Fréchet mean for a bubble distribution. Draw n_sampless from a bubble distribution, computes its empirical Fréchet mean and the square distance to the asymptotic mean. This is repeated n_expectation times to compute an approximation of its expectation (i.e. its variance) by sampling. The bubble distribution is an isotropic distributions on a Riemannian hyper sub-sphere of radius 0 < theta < Pi around the north pole of the sphere of dimension dim. Parameters ---------- n_samples : int Number of samples to draw. theta: float Radius of the bubble distribution. dim : int Dimension of the sphere (embedded in R^{dim+1}). n_expectation: int, optional (defaults to 1000) Number of computations for approximating the expectation. Returns ------- tuple (variance, std-dev on the computed variance) """ if dim <= 1: raise ValueError( 'Dim > 1 needed to draw a uniform sample on sub-sphere.') var = [] sphere = Hypersphere(dim=dim) bubble = Hypersphere(dim=dim - 1) north_pole = gs.zeros(dim + 1) north_pole[dim] = 1.0 for _ in range(n_expectation): # Sample n points from the uniform distribution on a sub-sphere # of radius theta (i.e cos(theta) in ambient space) # TODO (nina): Add this code as a method of hypersphere data = gs.zeros((n_samples, dim + 1), dtype=gs.float64) directions = bubble.random_uniform(n_samples) directions = gs.to_ndarray(directions, to_ndim=2) for i in range(n_samples): for j in range(dim): data[i, j] = gs.sin(theta) * directions[i, j] data[i, dim] = gs.cos(theta) # TODO (nina): Use FrechetMean here current_mean = _adaptive_gradient_descent(data, metric=sphere.metric, max_iter=32, init_point=north_pole) var.append(sphere.metric.squared_dist(north_pole, current_mean)) return gs.mean(var), 2 * gs.std(var) / gs.sqrt(n_expectation)
def update_posterior_probabilities(self, posterior_probabilities): """Posterior probabilities update function. Parameters ---------- posterior_probabilities : array-like, shape=[n_samples, n_gaussians,] Probability of a given sample to belong to a component of the GMM, computed for all components. """ self.mixture_coefficients = gs.mean(posterior_probabilities, 0)
def empirical_frechet_var_bubble(n_samples, theta, dim, n_expectation=1000): """Variance of the empirical Fréchet mean for a bubble distribution. Draw n_sampless from a bubble distribution, computes its empirical Fréchet mean and the square distance to the asymptotic mean. This is repeated n_expectation times to compute an approximation of its expectation (i.e. its variance) by sampling. The bubble distribution is an isotropic distributions on a Riemannian hyper sub-sphere of radius 0 < theta < Pi around the north pole of the sphere of dimension dim. Parameters ---------- n_samples : int Number of samples to draw. theta: float Radius of the bubble distribution. dim : int Dimension of the sphere (embedded in R^{dim+1}). n_expectation: int, optional (defaults to 1000) Number of computations for approximating the expectation. Returns ------- tuple (variance, std-dev on the computed variance) """ if dim <= 1: raise ValueError( "Dim > 1 needed to draw a uniform sample on sub-sphere.") var = [] sphere = Hypersphere(dim=dim) bubble = Hypersphere(dim=dim - 1) north_pole = gs.zeros(dim + 1) north_pole[dim] = 1.0 for _ in range(n_expectation): # Sample n points from the uniform distribution on a sub-sphere # of radius theta (i.e cos(theta) in ambient space) # TODO (nina): Add this code as a method of hypersphere last_col = gs.cos(theta) * gs.ones(n_samples) last_col = last_col[:, None] if (n_samples > 1) else last_col directions = bubble.random_uniform(n_samples) rest_col = gs.sin(theta) * directions data = gs.concatenate([rest_col, last_col], axis=-1) estimator = FrechetMean(sphere.metric, max_iter=32, method="adaptive", init_point=north_pole) estimator.fit(data) current_mean = estimator.estimate_ var.append(sphere.metric.squared_dist(north_pole, current_mean)) return gs.mean(var), 2 * gs.std(var) / gs.sqrt(n_expectation)
def fit(self, X, max_iter=100): """Predict for each data point the closest center in terms of riemannian_metric distance Parameters ---------- X : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. max_iter : Maximum number of iterations Returns ------- self : object Return centroids array """ n_samples = X.shape[0] belongs = gs.zeros(n_samples) self.centroids = [ gs.expand_dims(X[randint(0, n_samples - 1)], 0) for i in range(self.n_clusters) ] self.centroids = gs.concatenate(self.centroids) index = 0 while index < max_iter: index += 1 dists = [ gs.to_ndarray( self.riemannian_metric.dist(self.centroids[i], X), 2, 1) for i in range(self.n_clusters) ] dists = gs.hstack(dists) belongs = gs.argmin(dists, 1) old_centroids = gs.copy(self.centroids) for i in range(self.n_clusters): fold = gs.squeeze(X[belongs == i]) if len(fold) > 0: self.centroids[i] = self.riemannian_metric.mean(fold) else: self.centroids[i] = X[randint(0, n_samples - 1)] centroids_distances = self.riemannian_metric.dist( old_centroids, self.centroids) if gs.mean(centroids_distances) < self.tol: if self.verbose > 0: print("Convergence Reached after ", index, " iterations") return gs.copy(self.centroids) return gs.copy(self.centroids)
def test_spd_frechet_mean(self): """Test if the frechet mean of the samples is close to mean""" mean = gs.eye(self.n) cov = gs.eye(self.spd_cov_n) data = LogNormal(self.spd, mean, cov).sample(5000) _fm = gs.mean(self.spd.logm(data), axis=0) fm = self.spd.expm(_fm) expected = mean result = fm self.assertAllClose(result, expected, atol=5 * 1e-2)
def test_euclidean_frechet_mean(self): """Test if the frechet mean of the samples is close to mean""" mean = gs.zeros(self.n) cov = gs.eye(self.n) data = LogNormal(self.euclidean, mean, cov).sample(5000) log_data = gs.log(data) fm = gs.mean(log_data, axis=0) expected = mean result = fm self.assertAllClose(result, expected, atol=5 * 1e-2)
def test_linear_mean(self): euclidean = Euclidean(3) point = euclidean.random_point(self.n_samples) estimator = ExponentialBarycenter(euclidean) estimator.fit(point) result = estimator.estimate_ expected = gs.mean(point, axis=0) self.assertAllClose(result, expected)
def test_sample(self): """ Test that the sample method samples variates from beta distributions with the specified parameters, using the law of large numbers """ n_samples = self.n_samples tol = (n_samples * 10)**(-0.5) point = self.beta.random_uniform(n_samples) samples = self.beta.sample(point, n_samples * 10) result = gs.mean(samples, axis=1) expected = point[:, 0] / gs.sum(point, axis=1) self.assertAllClose(result, expected, rtol=tol, atol=tol)
def main(): r"""Compute a geodesic regression on Grassmann manifold (2, 3). The generative model of the data is: :math:`Z = Exp_{\beta_0}(\beta_1.X)` and :math:`Y = Exp_Z(\epsilon)` where: - :math:`Exp` denotes the Riemannian exponential, - :math:`\beta_0` is called the intercept, - :math:`\beta_1` is called the coefficient, - :math:`\epsilon \sim N(0, 1)` is a standard Gaussian noise, - :math:`X` is called the input, :math:`Y` is called the y. """ # Generate data n_samples = 10 data = gs.random.rand(n_samples) data -= gs.mean(data) intercept = SPACE.random_uniform() beta = SPACE.to_tangent(GeneralLinear(3).random_point(), intercept) target = METRIC.exp(tangent_vec=gs.einsum("...,jk->...jk", data, beta), base_point=intercept) # Fit geodesic regression gr = GeodesicRegression( SPACE, metric=METRIC, center_X=False, method="riemannian", max_iter=50, init_step_size=0.1, verbose=True, ) gr.fit(data, target, compute_training_score=True) intercept_hat, beta_hat = gr.intercept_, gr.coef_ # Measure Mean Squared Error mse_intercept = METRIC.squared_dist(intercept_hat, intercept) mse_beta = METRIC.squared_norm( METRIC.parallel_transport(beta_hat, METRIC.log( intercept_hat, intercept), intercept_hat) - beta, intercept, ) # Measure goodness of fit r2_hat = gr.training_score_ print(f"MSE on the intercept: {mse_intercept:.2e}") print(f"MSE on the initial velocity beta: {mse_beta:.2e}") print(f"Determination coefficient: R^2={r2_hat:.2f}")
def center(point): """Center landmarks around 0. Parameters ---------- point : array-like, shape=[..., k_landmarks, m_ambient] Point in Matrices space. Returns ------- centered : array-like, shape=[..., k_landmarks, m_ambient] Point with centered landmarks. """ mean = gs.mean(point, axis=-2) return point - mean[..., None, :]
def empirical_frechet_var_bubble(n_samples, theta, dim, n_expectation=1000): """Variance of the empirical Fréchet mean for a bubble distribution. Draw n_sampless from a bubble distribution, computes its empirical Fréchet mean and the square distance to the asymptotic mean. This is repeated n_expectation times to compute an approximation of its expectation (i.e. its variance) by sampling. The bubble distribution is an isotropic distributions on a Riemannian hyper sub-sphere of radius 0 < theta < Pi around the north pole of the sphere of dimension dim. Parameters ---------- n_samples: number of samples to draw theta: radius of the bubble distribution dim: dimension of the sphere (embedded in R^{dim+1}) n_expectation: number of computations for approximating the expectation Returns ------- tuple (variance, std-dev on the computed variance) """ assert dim > 1, "Dim > 1 needed to draw a uniform sample on sub-sphere" var = [] sphere = Hypersphere(dimension=dim) bubble = Hypersphere(dimension=dim - 1) north_pole = gs.zeros(dim + 1) north_pole[dim] = 1.0 for k in range(n_expectation): # Sample n points from the uniform distribution on a sub-sphere # of radius theta (i.e cos(theta) in ambient space) # TODO(nina): Add this code as a method of hypersphere data = gs.zeros((n_samples, dim + 1), dtype=gs.float64) directions = bubble.random_uniform(n_samples) for i in range(n_samples): for j in range(dim): data[i, j] = gs.sin(theta) * directions[i, j] data[i, dim] = gs.cos(theta) current_mean = sphere.metric.adaptive_gradientdescent_mean( data, n_max_iterations=64, init_points=[north_pole]) var.append(sphere.metric.squared_dist(north_pole, current_mean)) return gs.mean(var), 2 * gs.std(var) / gs.sqrt(n_expectation)
def is_centered(point, atol=gs.atol): """Check that landmarks are centered around 0. Parameters ---------- point : array-like, shape=[..., k_landmarks, m_ambient] Point in Matrices space. atol : float Tolerance at which to evaluate mean == 0. Optional, default: backend atol. Returns ------- is_centered : array-like, shape=[...,] Boolean evaluating if point is centered. """ mean = gs.mean(point, axis=-2) return gs.all(gs.isclose(mean, 0.0, atol=atol), axis=-1)
def to_tangent(self, vector, base_point=None): """Project a vector to the tangent space. Project a vector in Euclidean space on the tangent space of the simplex at a base point. Parameters ---------- vector : array-like, shape=[..., dim + 1] Vector in Euclidean space. base_point : array-like, shape=[..., dim + 1] Point on the simplex defining the tangent space, where the vector will be projected. Returns ------- vector : array-like, shape=[..., dim + 1] Tangent vector in the tangent space of the simplex at the base point. """ geomstats.errors.check_belongs(vector, self.embedding_space) component_mean = gs.mean(vector, axis=-1) return gs.transpose(gs.transpose(vector) - component_mean)
def main(): r"""Compute and visualize a geodesic regression on the sphere. The generative model of the data is: :math:`Z = Exp_{\beta_0}(\beta_1.X)` and :math:`Y = Exp_Z(\epsilon)` where: - :math:`Exp` denotes the Riemannian exponential, - :math:`\beta_0` is called the intercept, - :math:`\beta_1` is called the coefficient, - :math:`\epsilon \sim N(0, 1)` is a standard Gaussian noise, - :math:`X` is the input, :math:`Y` is the target. """ # Generate noise-free data n_samples = 50 X = gs.random.rand(n_samples) X -= gs.mean(X) intercept = SPACE.random_uniform() coef = SPACE.to_tangent(5.0 * gs.random.rand(EMBEDDING_DIM), base_point=intercept) y = METRIC.exp(X[:, None] * coef, base_point=intercept) # Generate normal noise normal_noise = gs.random.normal(size=(n_samples, EMBEDDING_DIM)) noise = SPACE.to_tangent(normal_noise, base_point=y) / gs.pi / 2 rss = gs.sum(METRIC.squared_norm(noise, base_point=y)) / n_samples # Add noise y = METRIC.exp(noise, y) # True noise level and R2 estimator = FrechetMean(METRIC) estimator.fit(y) variance_ = variance(y, estimator.estimate_, metric=METRIC) r2 = 1 - rss / variance_ # Fit geodesic regression gr = GeodesicRegression(SPACE, center_X=False, method="extrinsic", verbose=True) gr.fit(X, y, compute_training_score=True) intercept_hat, coef_hat = gr.intercept_, gr.coef_ # Measure Mean Squared Error mse_intercept = METRIC.squared_dist(intercept_hat, intercept) tangent_vec_to_transport = coef_hat tangent_vec_of_transport = METRIC.log(intercept, base_point=intercept_hat) transported_coef_hat = METRIC.parallel_transport( tangent_vec=tangent_vec_to_transport, base_point=intercept_hat, direction=tangent_vec_of_transport, ) mse_coef = METRIC.squared_norm(transported_coef_hat - coef, base_point=intercept) # Measure goodness of fit r2_hat = gr.training_score_ print(f"MSE on the intercept: {mse_intercept:.2e}") print(f"MSE on the coef, i.e. initial velocity: {mse_coef:.2e}") print(f"Determination coefficient: R^2={r2_hat:.2f}") print(f"True R^2: {r2:.2f}") # Plot fitted_data = gr.predict(X) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111, projection="3d") sphere_visu = visualization.Sphere(n_meridians=30) ax = sphere_visu.set_ax(ax=ax) path = METRIC.geodesic(initial_point=intercept_hat, initial_tangent_vec=coef_hat) regressed_geodesic = path( gs.linspace(0.0, 1.0, 100) * gs.pi * 2 / METRIC.norm(coef)) regressed_geodesic = gs.to_numpy(gs.autodiff.detach(regressed_geodesic)) size = 10 marker = "o" sphere_visu.draw_points(ax, gs.array([intercept_hat]), marker=marker, c="r", s=size) sphere_visu.draw_points(ax, y, marker=marker, c="b", s=size) sphere_visu.draw_points(ax, fitted_data, marker=marker, c="g", s=size) ax.plot( regressed_geodesic[:, 0], regressed_geodesic[:, 1], regressed_geodesic[:, 2], c="gray", ) sphere_visu.draw(ax, linewidth=1) ax.grid(False) plt.axis("off") plt.show()
def _fit(self, X, base_point=None, point_type='vector'): """Fit the model by computing full SVD on X""" if point_type == 'matrix': raise NotImplementedError( 'This is currently only implemented for vectors.') if base_point is None: base_point = self.metric.mean(X) tangent_vecs = self.metric.log(X, base_point=base_point) # Convert to sklearn format X = tangent_vecs X = check_array(X, dtype=[gs.float64, gs.float32], ensure_2d=True, copy=self.copy) # Handle n_components==None if self.n_components is None: n_components = min(X.shape) else: n_components = self.n_components n_samples, n_features = X.shape if n_components == 'mle': if n_samples < n_features: raise ValueError("n_components='mle' is only supported " "if n_samples >= n_features") elif not 0 <= n_components <= min(n_samples, n_features): raise ValueError("n_components=%r must be between 0 and " "min(n_samples, n_features)=%r with " "svd_solver='full'" % (n_components, min(n_samples, n_features))) elif n_components >= 1: if not isinstance(n_components, (numbers.Integral, gs.integer)): raise ValueError("n_components=%r must be of type int " "when greater than or equal to 1, " "was of type=%r" % (n_components, type(n_components))) # Center data self.mean_ = gs.mean(X, axis=0) X -= self.mean_ U, S, V = linalg.svd(X, full_matrices=False) # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U, V) components_ = V # Get variance explained by singular values explained_variance_ = (S**2) / (n_samples - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var singular_values_ = S.copy() # Store the singular values. # Postprocess the number of components required if n_components == 'mle': n_components = \ _infer_dimension_(explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = stable_cumsum(explained_variance_ratio_) n_components = gs.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = n_components self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = \ explained_variance_ratio_[:n_components] self.singular_values_ = singular_values_[:n_components] return U, S, V
def fit(self, data, max_iter=DEFAULT_MAX_ITER, lr_mean=DEFAULT_LR, conv_factor_mean=DEFAULT_CONV_FACTOR): """Fit a Gaussian mixture model (GMM) given the data. Alternates between Expectation and Maximization steps for some number of iterations. Parameters ---------- data : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. max_iter : int Optional, default: 100. Maximum number of iterations. lr_mean : float Optional, default: 5e-2. Learning rate for the mean. conv_factor_mean : float Optional, default: 5e-3. Convergence factor for the mean. Returns ------- self : object Return the components of the computed Gaussian mixture model: means, variances and mixture_coefficients. """ self._dimension = data.shape[-1] self.means = (gs.random.rand( self.n_gaussians, self._dimension) - 0.5) / self._dimension self.variances = gs.random.rand(self.n_gaussians) / 10 + 0.8 self.mixture_coefficients = \ gs.ones(self.n_gaussians) / self.n_gaussians posterior_probabilities = gs.ones((data.shape[0], self.means.shape[0])) self.variances_range,\ self.normalization_factor_var, \ self.phi_inv_var =\ self.riemannian_metric.normalization_factor_init( gs.arange( ZETA_LOWER_BOUND, ZETA_UPPER_BOUND, ZETA_STEP)) for epoch in range(max_iter): old_posterior_probabilities = posterior_probabilities posterior_probabilities = self._expectation(data) condition = gs.mean(gs.abs(old_posterior_probabilities - posterior_probabilities)) if(condition < EM_CONV_RATE and epoch > MINIMUM_EPOCHS): logging.info('EM converged in %s iterations', epoch) return self.means, self.variances, self.mixture_coefficients self._maximization(data, posterior_probabilities, lr_means=lr_mean, conv_factor_mean=conv_factor_mean) logging.info('WARNING: EM did not converge \n' 'Please increase MINIMUM_EPOCHS.') return self.means, self.variances, self.mixture_coefficients
def setup_method(self): gs.random.seed(1234) self.n_samples = 20 # Set up for euclidean self.dim_eucl = 3 self.shape_eucl = (self.dim_eucl, ) self.eucl = Euclidean(dim=self.dim_eucl) X = gs.random.rand(self.n_samples) self.X_eucl = X - gs.mean(X) self.intercept_eucl_true = self.eucl.random_point() self.coef_eucl_true = self.eucl.random_point() self.y_eucl = (self.intercept_eucl_true + self.X_eucl[:, None] * self.coef_eucl_true) self.param_eucl_true = gs.vstack( [self.intercept_eucl_true, self.coef_eucl_true]) self.param_eucl_guess = gs.vstack([ self.y_eucl[0], self.y_eucl[0] + gs.random.normal(size=self.shape_eucl) ]) # Set up for hypersphere self.dim_sphere = 4 self.shape_sphere = (self.dim_sphere + 1, ) self.sphere = Hypersphere(dim=self.dim_sphere) X = gs.random.rand(self.n_samples) self.X_sphere = X - gs.mean(X) self.intercept_sphere_true = self.sphere.random_point() self.coef_sphere_true = self.sphere.projection( gs.random.rand(self.dim_sphere + 1)) self.y_sphere = self.sphere.metric.exp( self.X_sphere[:, None] * self.coef_sphere_true, base_point=self.intercept_sphere_true, ) self.param_sphere_true = gs.vstack( [self.intercept_sphere_true, self.coef_sphere_true]) self.param_sphere_guess = gs.vstack([ self.y_sphere[0], self.sphere.to_tangent(gs.random.normal(size=self.shape_sphere), self.y_sphere[0]), ]) # Set up for special euclidean self.se2 = SpecialEuclidean(n=2) self.metric_se2 = self.se2.left_canonical_metric self.metric_se2.default_point_type = "matrix" self.shape_se2 = (3, 3) X = gs.random.rand(self.n_samples) self.X_se2 = X - gs.mean(X) self.intercept_se2_true = self.se2.random_point() self.coef_se2_true = self.se2.to_tangent( 5.0 * gs.random.rand(*self.shape_se2), self.intercept_se2_true) self.y_se2 = self.metric_se2.exp( self.X_se2[:, None, None] * self.coef_se2_true[None], self.intercept_se2_true, ) self.param_se2_true = gs.vstack([ gs.flatten(self.intercept_se2_true), gs.flatten(self.coef_se2_true), ]) self.param_se2_guess = gs.vstack([ gs.flatten(self.y_se2[0]), gs.flatten( self.se2.to_tangent(gs.random.normal(size=self.shape_se2), self.y_se2[0])), ]) # Set up for discrete curves n_sampling_points = 8 self.curves_2d = DiscreteCurves(R2) self.metric_curves_2d = self.curves_2d.srv_metric self.metric_curves_2d.default_point_type = "matrix" self.shape_curves_2d = (n_sampling_points, 2) X = gs.random.rand(self.n_samples) self.X_curves_2d = X - gs.mean(X) self.intercept_curves_2d_true = self.curves_2d.random_point( n_sampling_points=n_sampling_points) self.coef_curves_2d_true = self.curves_2d.to_tangent( 5.0 * gs.random.rand(*self.shape_curves_2d), self.intercept_curves_2d_true) # Added because of GitHub issue #1575 intercept_curves_2d_true_repeated = gs.tile( gs.expand_dims(self.intercept_curves_2d_true, axis=0), (self.n_samples, 1, 1), ) self.y_curves_2d = self.metric_curves_2d.exp( self.X_curves_2d[:, None, None] * self.coef_curves_2d_true[None], intercept_curves_2d_true_repeated, ) self.param_curves_2d_true = gs.vstack([ gs.flatten(self.intercept_curves_2d_true), gs.flatten(self.coef_curves_2d_true), ]) self.param_curves_2d_guess = gs.vstack([ gs.flatten(self.y_curves_2d[0]), gs.flatten( self.curves_2d.to_tangent( gs.random.normal(size=self.shape_curves_2d), self.y_curves_2d[0])), ])
def _ball_gradient_descent(points, metric, weights=None, max_iter=32, lr=1e-3, tau=5e-3): """Perform ball gradient descent.""" points = gs.to_ndarray(points, to_ndim=2) if len(points) == 1: return points[0] if weights is None: iteration = 0 convergence = math.inf barycenter = gs.mean(points, axis=0, keepdims=True) while convergence > tau and max_iter > iteration: iteration += 1 grad_tangent = 2 * metric.log(points, barycenter) cc_barycenter = metric.exp(lr * grad_tangent.sum(0, keepdims=True), barycenter) convergence = metric.dist(cc_barycenter, barycenter).max().item() barycenter = cc_barycenter else: weights = gs.expand_dims(weights, -1) weights = gs.repeat(weights, points.shape[-1], axis=2) barycenter = (points * weights).sum(0, keepdims=True) / weights.sum(0) barycenter_gs = gs.squeeze(barycenter) points_gs = gs.squeeze(points) points_flattened = gs.reshape(points_gs, (-1, points_gs.shape[-1])) convergence = math.inf iteration = 0 while convergence > tau and max_iter > iteration: iteration += 1 barycenter_flattened = gs.repeat(barycenter, len(points_gs), axis=0) barycenter_flattened = gs.reshape( barycenter_flattened, (-1, barycenter_flattened.shape[-1])) grad_tangent = 2 * metric.log(points_flattened, barycenter_flattened) grad_tangent = gs.reshape(grad_tangent, points.shape) grad_tangent = grad_tangent * weights lr_grad_tangent = lr * grad_tangent.sum(0, keepdims=True) lr_grad_tangent_s = lr_grad_tangent.squeeze() cc_barycenter = metric.exp(barycenter_gs, lr_grad_tangent_s) convergence = metric.dist(cc_barycenter, barycenter_gs).max().item() barycenter_gs = cc_barycenter barycenter = gs.expand_dims(cc_barycenter, 0) barycenter = gs.squeeze(barycenter) if iteration == max_iter: logging.warning('Maximum number of iterations {} reached. The ' 'mean may be inaccurate'.format(max_iter)) return barycenter
def fit(self, X, max_iter=100): """Provide clusters centroids and data labels. Alternate between computing the mean of each cluster and labelling data according to the new positions of the centroids. Parameters ---------- X : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. max_iter : int Maximum number of iterations Returns ------- self : array-like, shape=[n_clusters,] centroids array """ n_samples = X.shape[0] belongs = gs.zeros(n_samples) self.centroids = [ gs.expand_dims(X[randint(0, n_samples - 1)], 0) for i in range(self.n_clusters) ] self.centroids = gs.concatenate(self.centroids) index = 0 while index < max_iter: index += 1 dists = [ gs.to_ndarray( self.riemannian_metric.dist(self.centroids[i], X), 2, 1) for i in range(self.n_clusters) ] dists = gs.hstack(dists) belongs = gs.argmin(dists, 1) old_centroids = gs.copy(self.centroids) for i in range(self.n_clusters): fold = gs.squeeze(X[belongs == i]) if len(fold) > 0: mean = FrechetMean(metric=self.riemannian_metric, method=self.mean_method, max_iter=150) mean.fit(fold) self.centroids[i] = mean.estimate_ else: self.centroids[i] = X[randint(0, n_samples - 1)] centroids_distances = self.riemannian_metric.dist( old_centroids, self.centroids) if gs.mean(centroids_distances) < self.tol: if self.verbose > 0: logging.info('Convergence reached after {} ' 'iterations'.format(index)) return gs.copy(self.centroids) if index == max_iter: logging.warning('K-means maximum number of iterations {} reached. ' 'The mean may be inaccurate'.format(max_iter)) return gs.copy(self.centroids)
def fit(self, X): """Provide clusters centroids and data labels. Alternate between computing the mean of each cluster and labelling data according to the new positions of the centroids. Parameters ---------- X : array-like, shape=[..., n_features] Training data, where n_samples is the number of samples and n_features is the number of features. max_iter : int Maximum number of iterations. Optional, default: 100. Returns ------- self : array-like, shape=[n_clusters,] Centroids. """ n_samples = X.shape[0] if self.verbose > 0: logging.info("Initializing...") if self.init == "kmeans++": centroids = [gs.expand_dims(X[randint(0, n_samples - 1)], 0)] for i in range(self.n_clusters - 1): dists = [ gs.to_ndarray(self.metric.dist(centroids[j], X), 2, 1) for j in range(i + 1) ] dists = gs.hstack(dists) dists_to_closest_centroid = gs.amin(dists, 1) indices = gs.arange(n_samples) weights = dists_to_closest_centroid / gs.sum( dists_to_closest_centroid) index = rv_discrete(values=(indices, weights)).rvs() centroids.append(gs.expand_dims(X[index], 0)) else: centroids = [ gs.expand_dims(X[randint(0, n_samples - 1)], 0) for i in range(self.n_clusters) ] self.centroids = gs.concatenate(centroids, axis=0) self.init_centroids = gs.concatenate(centroids, axis=0) dists = [ gs.to_ndarray(self.metric.dist(self.centroids[i], X), 2, 1) for i in range(self.n_clusters) ] dists = gs.hstack(dists) self.labels = gs.argmin(dists, 1) index = 0 while index < self.max_iter: index += 1 if self.verbose > 0: logging.info(f"Iteration {index}...") old_centroids = gs.copy(self.centroids) for i in range(self.n_clusters): fold = gs.squeeze(X[self.labels == i]) if len(fold) > 0: mean = FrechetMean( metric=self.metric, max_iter=self.max_iter_mean, point_type=self.point_type, method=self.mean_method, init_step_size=self.init_step_size, ) mean.fit(fold) self.centroids[i] = mean.estimate_ else: self.centroids[i] = X[randint(0, n_samples - 1)] dists = [ gs.to_ndarray(self.metric.dist(self.centroids[i], X), 2, 1) for i in range(self.n_clusters) ] dists = gs.hstack(dists) self.labels = gs.argmin(dists, 1) dists_to_closest_centroid = gs.amin(dists, 1) self.inertia = gs.sum(dists_to_closest_centroid**2) centroids_distances = self.metric.dist(old_centroids, self.centroids) if self.verbose > 0: logging.info( f"Convergence criterion at the end of iteration {index} " f"is {gs.mean(centroids_distances)}.") if gs.mean(centroids_distances) < self.tol: if self.verbose > 0: logging.info( f"Convergence reached after {index} iterations.") if self.n_clusters == 1: self.centroids = gs.squeeze(self.centroids, axis=0) return gs.copy(self.centroids) if index == self.max_iter: logging.warning( f"K-means maximum number of iterations {self.max_iter} reached. " "The mean may be inaccurate.") if self.n_clusters == 1: self.centroids = gs.squeeze(self.centroids, axis=0) return gs.copy(self.centroids)
def fit(self, data): """Fit a Gaussian mixture model (GMM) given the data. Alternates between Expectation and Maximization steps for some number of iterations. Parameters ---------- data : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. Returns ------- self : object Return the components of the computed Gaussian mixture model: means, variances and mixture_coefficients. """ self._dimension = data.shape[-1] if self.initialisation_method == 'kmeans': kmeans = RiemannianKMeans(metric=self.metric, n_clusters=self.n_gaussians, init='random', mean_method='batch', lr=self.lr_mean) centroids = kmeans.fit(X=data) labels = kmeans.predict(X=data) self.means = centroids self.variances = gs.zeros(self.n_gaussians) labeled_data = gs.vstack([labels, gs.transpose(data)]) labeled_data = gs.transpose(labeled_data) for label, centroid in enumerate(centroids): label_mask = gs.where(labeled_data[:, 0] == label) grouped_by_label = labeled_data[label_mask][:, 1:] v = variance(grouped_by_label, centroid, self.metric) if grouped_by_label.shape[0] == 1: v += MIN_VAR_INIT self.variances[label] = v else: self.means = (gs.random.rand(self.n_gaussians, self._dimension) - 0.5) / self._dimension self.variances = gs.random.rand(self.n_gaussians) / 10 + 0.8 self.mixture_coefficients = \ gs.ones(self.n_gaussians) / self.n_gaussians posterior_probabilities = gs.ones((data.shape[0], self.means.shape[0])) self.variances_range,\ self.normalization_factor_var, \ self.phi_inv_var =\ self.normalization_factor_init( gs.arange( ZETA_LOWER_BOUND, ZETA_UPPER_BOUND, ZETA_STEP)) for epoch in range(self.max_iter): old_posterior_probabilities = posterior_probabilities posterior_probabilities = self._expectation(data) condition = gs.mean( gs.abs(old_posterior_probabilities - posterior_probabilities)) if condition < EM_CONV_RATE and epoch > MINIMUM_EPOCHS: logging.info('EM converged in %s iterations', epoch) return self.means, self.variances, self.mixture_coefficients self._maximization(data, posterior_probabilities) logging.info('WARNING: EM did not converge \n' 'Please increase MINIMUM_EPOCHS.') return self.means, self.variances, self.mixture_coefficients
def _fit(self, X, base_point=None): """Fit the model by computing full SVD on X. Parameters ---------- X : array-like, shape=[..., n_features] Training data, where n_samples is the number of samples and n_features is the number of features. y : Ignored (Compliance with scikit-learn interface) base_point : array-like, shape=[..., n_features] Point at which to perform the tangent PCA. Optional, default to Frechet mean if None. Returns ------- U, S, V : array-like Matrices of the SVD decomposition """ if base_point is None: mean = FrechetMean(metric=self.metric, point_type=self.point_type) mean.fit(X) base_point = mean.estimate_ tangent_vecs = self.metric.log(X, base_point=base_point) if self.point_type == 'matrix': if Matrices.is_symmetric(tangent_vecs).all(): X = SymmetricMatrices.to_vector(tangent_vecs) else: X = gs.reshape(tangent_vecs, (len(X), -1)) else: X = tangent_vecs if self.n_components is None: n_components = min(X.shape) else: n_components = self.n_components n_samples, n_features = X.shape if n_components == 'mle': if n_samples < n_features: raise ValueError("n_components='mle' is only supported " "if n_samples >= n_features") elif not 0 <= n_components <= min(n_samples, n_features): raise ValueError("n_components=%r must be between 0 and " "min(n_samples, n_features)=%r with " "svd_solver='full'" % (n_components, min(n_samples, n_features))) elif n_components >= 1: if not isinstance(n_components, numbers.Integral): raise ValueError("n_components=%r must be of type int " "when greater than or equal to 1, " "was of type=%r" % (n_components, type(n_components))) # Center data - the mean should be 0 if base_point is the Frechet mean self.mean_ = gs.mean(X, axis=0) X -= self.mean_ U, S, V = gs.linalg.svd(X, full_matrices=False) # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U, V) components_ = V # Get variance explained by singular values explained_variance_ = (S**2) / (n_samples - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var singular_values_ = gs.copy(S) # Store the singular values. # Postprocess the number of components required if n_components == 'mle': n_components = \ _infer_dimension_(explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = stable_cumsum(explained_variance_ratio_) n_components = gs.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.base_point_fit = base_point self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = int(n_components) self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = \ explained_variance_ratio_[:n_components] self.singular_values_ = singular_values_[:n_components] return U, S, V