def find_normalization_factor(variances, variances_range, normalization_factor_var): """Find the normalization factor given some variances. Parameters ---------- variances : array-like, shape=[n_gaussians,] Array of standard deviations for each component of some GMM. variances_range : array-like, shape=[n_variances,] Array of standard deviations. normalization_factor_var : array-like, shape=[n_variances,] Array of computed normalization factor. Returns ------- norm_factor : array-like, shape=[n_gaussians,] Array of normalization factors for the given variances. """ n_gaussians, precision = variances.shape[0], variances_range.shape[0] ref = gs.expand_dims(variances_range, 0) ref = gs.repeat(ref, n_gaussians, axis=0) val = gs.expand_dims(variances, 1) val = gs.repeat(val, precision, axis=1) difference = gs.abs(ref - val) index = gs.argmin(difference, axis=-1) norm_factor = normalization_factor_var[index] return norm_factor
def normalization_factor(self, variances): """Return normalization factor. Parameters ---------- variances : array-like, shape=[n,] Array of equally distant values of the variance precision time. Returns ------- norm_func : array-like, shape=[n,] Normalisation factor for all given variances. """ binomial_coefficient = None n_samples = variances.shape[0] expand_variances = gs.expand_dims(variances, axis=0) expand_variances = gs.repeat(expand_variances, self.dim, axis=0) if binomial_coefficient is None: dim_range = gs.arange(self.dim) dim_range[0] = 1 n_fact = dim_range.prod() k_fact = gs.concatenate([ gs.expand_dims(dim_range[:i].prod(), 0) for i in range(1, dim_range.shape[0] + 1) ], 0) nmk_fact = gs.flip(k_fact, 0) binomial_coefficient = n_fact / (k_fact * nmk_fact) binomial_coefficient = gs.expand_dims(binomial_coefficient, -1) binomial_coefficient = gs.repeat(binomial_coefficient, n_samples, axis=1) range_ = gs.expand_dims(gs.arange(self.dim), -1) range_ = gs.repeat(range_, n_samples, axis=1) ones_ = gs.expand_dims(gs.ones(self.dim), -1) ones_ = gs.repeat(ones_, n_samples, axis=1) alternate_neg = (-ones_)**(range_) erf_arg = (( (self.dim - 1) - 2 * range_) * expand_variances) / gs.sqrt(2) exp_arg = ((((self.dim - 1) - 2 * range_) * expand_variances) / gs.sqrt(2))**2 norm_func_1 = (1 + gs.erf(erf_arg)) * gs.exp(exp_arg) norm_func_2 = binomial_coefficient * norm_func_1 norm_func_3 = alternate_neg * norm_func_2 norm_func = NORMALIZATION_FACTOR_CST * variances * \ norm_func_3.sum(0) * (1 / (2 ** (self.dim - 1))) return norm_func
def permute_vectorization_test_data(self): space = self._PointSet(2) points = space.random_point(3) permutation = gs.array([0, 1]) smoke_data = [ dict(space=space, graph=points[0], id_permutation=permutation), dict( space=space, graph=points[0], id_permutation=gs.repeat(gs.expand_dims(permutation, 0), 2, axis=0), ), dict(space=space, graph=points, id_permutation=permutation), dict( space=space, graph=points, id_permutation=gs.repeat(gs.expand_dims(permutation, 0), points.shape[0], axis=0), ), ] return self.generate_tests(smoke_data)
def norm_factor_gradient(self, variances): """Compute normalization factor and its gradient. Compute normalization factor given current variance and dimensionality. Parameters ---------- variances : array-like, shape=[n] Value of variance. Returns ------- norm_factor : array-like, shape=[n] Normalisation factor. norm_factor_gradient : array-like, shape=[n] Gradient of the normalization factor. """ variances = gs.transpose(gs.to_ndarray(variances, to_ndim=2)) dim_range = gs.arange(0, self.dim, 1.0) alpha = self._compute_alpha(dim_range) binomial_coefficient = gs.ones(self.dim) binomial_coefficient[1:] = (self.dim - 1 + 1 - dim_range[1:]) / dim_range[1:] binomial_coefficient = gs.cumprod(binomial_coefficient) beta = ((-gs.ones(self.dim)) ** dim_range) * binomial_coefficient sigma_repeated = gs.repeat(variances, self.dim, -1) prod_alpha_sigma = gs.einsum("ij,j->ij", sigma_repeated, alpha) term_2 = gs.exp((prod_alpha_sigma) ** 2) * (1 + gs.erf(prod_alpha_sigma)) term_1 = gs.sqrt(gs.pi / 2.0) * (1.0 / (2 ** (self.dim - 1))) term_2 = gs.einsum("ij,j->ij", term_2, beta) norm_factor = term_1 * variances * gs.sum(term_2, axis=-1, keepdims=True) grad_term_1 = 1 / variances grad_term_21 = 1 / gs.sum(term_2, axis=-1, keepdims=True) grad_term_211 = ( gs.exp((prod_alpha_sigma) ** 2) * (1 + gs.erf(prod_alpha_sigma)) * gs.einsum("ij,j->ij", sigma_repeated, alpha**2) * 2 ) grad_term_212 = gs.repeat( gs.expand_dims((2 / gs.sqrt(gs.pi)) * alpha, axis=0), variances.shape[0], axis=0, ) grad_term_22 = grad_term_211 + grad_term_212 grad_term_22 = gs.einsum("ij, j->ij", grad_term_22, beta) grad_term_22 = gs.sum(grad_term_22, axis=-1, keepdims=True) norm_factor_gradient = grad_term_1 + (grad_term_21 * grad_term_22) return gs.squeeze(norm_factor), gs.squeeze(norm_factor_gradient)
def gmm_pdf( data, means, variances, norm_func, metric, variances_range, norm_func_var): """Return the separate probability density function of GMM. The probability density function is computed for each component of the GMM separately (i.e., mixture coefficients are not taken into account). Parameters ---------- data : array-like, shape=[n_samples, dim] Points at which the GMM probability density is computed. means : array-like, shape=[n_gaussians, dim] Means of each component of the GMM. variances : array-like, shape=[n_gaussians,] Variances of each component of the GMM. norm_func : function Normalisation factor function. metric : function Distance function associated with the used metric. Returns ------- pdf : array-like, shape=[n_samples, n_gaussians,] Probability density function computed at each data sample and for each component of the GMM. """ data_length, _, _ = data.shape + (means.shape[0],) variances_expanded = gs.expand_dims(variances, 0) variances_expanded = gs.repeat(variances_expanded, data_length, 0) variances_flatten = variances_expanded.flatten() distances = -(metric.dist_broadcast(data, means) ** 2) distances = gs.reshape(distances, (data.shape[0] * variances.shape[0])) num = gs.exp( distances / (2 * variances_flatten ** 2)) den = norm_func(variances, variances_range, norm_func_var) den = gs.expand_dims(den, 0) den = gs.repeat(den, data_length, axis=0).flatten() pdf = num / den pdf = gs.reshape( pdf, (data.shape[0], means.shape[0])) return pdf
def weighted_gmm_pdf(mixture_coefficients, mesh_data, means, variances, metric): """Return the probability density function of a GMM. Parameters ---------- mixture_coefficients : array-like, shape=[n_gaussians,] Coefficients of the Gaussian mixture model. mesh_data : array-like, shape=[n_precision, dim] Points at which the GMM probability density is computed. means : array-like, shape=[n_gaussians, dim] Means of each component of the GMM. variances : array-like, shape=[n_gaussians,] Variances of each component of the GMM. metric : function Distance function associated with the used metric. Returns ------- weighted_pdf : array-like, shape=[n_precision, n_gaussians,] Probability density function computed for each point of the mesh data, for each component of the GMM. """ distance_to_mean = metric.dist_broadcast(mesh_data, means) variances_units = gs.expand_dims(variances, 0) variances_units = gs.repeat( variances_units, distance_to_mean.shape[0], axis=0) distribution_normal = gs.exp( -(distance_to_mean ** 2) / (2 * variances_units ** 2)) zeta_sigma = PI_2_3 * variances zeta_sigma = zeta_sigma * gs.exp( (variances ** 2 / 2) * gs.erf(variances / gs.sqrt(2))) result_num = gs.expand_dims(mixture_coefficients, 0) result_num = gs.repeat( result_num, len(distribution_normal), axis=0) result_num = result_num * distribution_normal result_denum = gs.expand_dims(zeta_sigma, 0) result_denum = gs.repeat( result_denum, len(distribution_normal), axis=0) weighted_pdf = result_num / result_denum return weighted_pdf
def loss(example_embedding, context_embedding, negative_embedding, manifold): """Compute loss and grad. Compute loss and grad given embedding of the current example, embedding of the context and negative sampling embedding. """ n_edges, dim =\ negative_embedding.shape[0], example_embedding.shape[-1] example_embedding = gs.expand_dims(example_embedding, 0) context_embedding = gs.expand_dims(context_embedding, 0) positive_distance =\ manifold.metric.squared_dist( example_embedding, context_embedding) positive_loss =\ log_sigmoid(-positive_distance) reshaped_example_embedding =\ gs.repeat(example_embedding, n_edges, axis=0) negative_distance =\ manifold.metric.squared_dist( reshaped_example_embedding, negative_embedding) negative_loss = log_sigmoid(negative_distance) total_loss = -(positive_loss + negative_loss.sum()) positive_log_sigmoid_grad =\ -grad_log_sigmoid(-positive_distance) positive_distance_grad =\ grad_squared_distance(example_embedding, context_embedding) positive_grad =\ gs.repeat(positive_log_sigmoid_grad, dim, axis=-1)\ * positive_distance_grad negative_distance_grad =\ grad_squared_distance(reshaped_example_embedding, negative_embedding) negative_distance = gs.to_ndarray(negative_distance, to_ndim=2, axis=-1) negative_log_sigmoid_grad =\ grad_log_sigmoid(negative_distance) negative_grad = negative_log_sigmoid_grad\ * negative_distance_grad example_grad = -(positive_grad + negative_grad.sum(axis=0)) return total_loss, example_grad
def _ball_to_extrinsic_coordinates(point): """Convert ball to extrinsic coordinates. Convert the parameterization of a point in hyperbolic space from its poincare ball model coordinates, to the extrinsic coordinates. Parameters ---------- point : array-like, shape=[n_samples, dimension] Point in hyperbolic space in Poincare ball coordinates. Returns ------- extrinsic : array-like, shape=[n_samples, dimension + 1] Point in hyperbolic space in extrinsic coordinates. """ squared_norm = gs.sum(point**2, -1) denominator = 1 - squared_norm t = gs.to_ndarray((1 + squared_norm) / denominator, to_ndim=2, axis=1) expanded_denominator = gs.expand_dims(denominator, -1) expanded_denominator = gs.repeat(expanded_denominator, point.shape[-1], -1) intrinsic = (2 * point) / expanded_denominator return gs.concatenate([t, intrinsic], -1)
def closest_neighbor_index(self, point, neighbors): """Closest neighbor of point among neighbors. Parameters ---------- point : array-like, shape=[..., dim] Point. neighbors : array-like, shape=[n_neighbors, dim] Neighbors. Returns ------- closest_neighbor_index : int Index of closest neighbor. """ n_points = point.shape[0] if gs.ndim(point) == gs.ndim( neighbors) else 1 n_neighbors = neighbors.shape[0] if n_points > 1 and n_neighbors > 1: neighbors = gs.repeat(neighbors, n_points, axis=0) point = gs.concatenate([point for _ in range(n_neighbors)]) closest_neighbor_index = gs.argmin( gs.transpose( gs.reshape(self.dist(point, neighbors), (n_neighbors, n_points)), ), axis=1, ) if n_points == 1: return closest_neighbor_index[0] return closest_neighbor_index
def sample_y(self, X, n_samples=1, random_state=0): """Draw samples from Wrapped Gaussian process and evaluate at X. A fitted Wrapped Gaussian process can be use to sample values through the following steps: - Use the stored Gaussian process regression on the dataset to sample tangent values - Compute the base-points using the prior - Flatten (and repeat if needed) both the base-points and the tangent samples to benefit from vectorized computation. - Map the tangent samples on the manifold via the metric's exp with the flattened and repeated base-points yielded by the prior Parameters ---------- X : array-like of shape (n_samples_X, n_features) or list of object Query points where the WGP is evaluated. n_samples : int, default=1 Number of samples drawn from the Wrapped Gaussian process per query point. random_state : int, RandomState instance or None, default=0 Determines random number generation to randomly draw samples. Pass an int for reproducible results across multiple function calls. Returns ------- y_samples : ndarray of shape (n_samples_X, n_samples), or \ (n_samples_X, n_targets, n_samples) Values of n_samples samples drawn from wrapped Gaussian process and evaluated at query points. """ tangent_samples = self._euclidean_gpr.sample_y(X, n_samples, random_state) tangent_samples = gs.cast(tangent_samples, dtype=X.dtype) # flatten the samples tangent_samples = gs.reshape(gs.transpose(tangent_samples, [0, 2, 1]), (-1, *self.y_train_shape_)) # generate the base_points base_points = self.prior(X) # repeat the base points in order to match the tangent samples base_points = gs.repeat(gs.expand_dims(base_points, 2), n_samples, axis=2) # flatten the base_points base_points = gs.reshape(gs.transpose(base_points, [0, 2, 1]), (-1, *self.y_train_shape_)) # get the flattened samples y_samples = self.metric.exp(tangent_samples, base_point=base_points) y_samples = gs.transpose( gs.reshape(y_samples, (X.shape[0], n_samples, *self.y_train_shape_)), [0, 2, 1], ) return y_samples
def test_weighted_frechet_mean(self): """Test for weighted mean.""" data = gs.array([[0.1, 0.2], [0.25, 0.35], [-0.1, -0.2], [-0.4, 0.3]]) weights = gs.repeat([0.5], data.shape[0]) mean_o = FrechetMean(metric=self.metric, point_type='vector') mean_o.fit(data, weights) mean = mean_o.estimate_ mean_verdict = [-0.03857, 0.15922] self.assertAllClose(mean, mean_verdict, TOLERANCE)
def find_variance_from_index(weighted_distances, variances_range, phi_inv_var): r"""Return the variance given weighted distances. Parameters ---------- weighted_distances : array-like, shape=[n_gaussians,] Mean of the weighted distances between training data and current barycentres. The weights of each data sample corresponds to the probability of belonging to a component of the Gaussian mixture model. variances_range : array-like, shape=[n_variances,] Array of standard deviations. phi_inv_var : array-like, shape=[n_variances,] Array of the computed inverse of a function phi whose expression is closed-form :math:`\sigma\mapsto \sigma^3 \times \frac{d } {\mathstrut d\sigma}\log \zeta_m(\sigma)' where :math:'\sigma' denotes the variance and :math:'\zeta' the normalization coefficient and :math:'m' the dimension. Returns ------- var : array-like, shape=[n_gaussians,] Estimated variances for each component of the GMM. """ n_gaussians, precision = \ weighted_distances.shape[0], variances_range.shape[0] ref = gs.expand_dims(phi_inv_var, 0) ref = gs.repeat(ref, n_gaussians, axis=0) val = gs.expand_dims(weighted_distances, 1) val = gs.repeat(val, precision, axis=1) abs_difference = gs.abs(ref - val) index = gs.argmin(abs_difference, -1) var = variances_range[index] return var
def mobius_add(self, point_a, point_b): r"""Compute the Mobius addition of two points. Mobius addition operation that is a necessary operation to compute the log and exp using the 'ball' representation. .. math:: a\oplus b=\frac{(1+2\langle a,b\rangle + ||b||^2)a+ (1-||a||^2)b}{1+2\langle a,b\rangle + ||a||^2||b||^2} Parameters ---------- point_a : array-like, shape=[n_samples, dimension + 1] Point in hyperbolic space. point_b : array-like, shape=[n_samples, dimension + 1] Point in hyperbolic space. Returns ------- mobius_add : array-like, shape=[n_samples, 1] Result of the Mobius addition. """ norm_point_a = gs.sum(point_a**2, axis=-1, keepdims=True) # to redefine to use autograd norm_point_a = gs.repeat(norm_point_a, point_a.shape[-1], -1) norm_point_b = gs.sum(point_b**2, axis=-1, keepdims=True) norm_point_b = gs.repeat(norm_point_b, point_a.shape[-1], -1) sum_prod_a_b = gs.sum(point_a * point_b, axis=-1, keepdims=True) sum_prod_a_b = gs.repeat(sum_prod_a_b, point_a.shape[-1], -1) add_nominator = ((1 + 2 * sum_prod_a_b + norm_point_b) * point_a + (1 - norm_point_a) * point_b) add_denominator = (1 + 2 * sum_prod_a_b + norm_point_a * norm_point_b) mobius_add = add_nominator / add_denominator return mobius_add
def test_unary_op_vec(self, func_name, a): gs_fnc = get_backend_fnc(func_name) res = gs_fnc(a) a_expanded = gs.expand_dims(a, 0) a_rep = gs.repeat(a_expanded, 2, axis=0) res_a_rep = gs_fnc(a_rep) for res_ in res_a_rep: self.assertAllClose(res_, res)
def belongs(self, point): """ Check if point belongs to the Euclidean space. """ point = gs.to_ndarray(point, to_ndim=2) n_points, point_dim = point.shape belongs = point_dim == self.dimension belongs = gs.repeat(belongs, repeats=n_points, axis=0) belongs = gs.to_ndarray(belongs, to_ndim=2, axis=1) return belongs
def mobius_add(self, point_a, point_b): """Compute the mobius addition of two points. Mobius addition is necessary for computation of the log and exp using the 'poincare' representation set as point_type. Parameters ---------- point_a : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] point_b : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] Returns ------- mobius_add : array-like, shape=[n_samples, 1] or shape=[1, 1] """ norm_point_a = gs.sum(point_a**2, axis=-1, keepdims=True) # to redefine to use autograd norm_point_a = gs.repeat(norm_point_a, point_a.shape[-1], -1) norm_point_b = gs.sum(point_b**2, axis=-1, keepdims=True) norm_point_b = gs.repeat(norm_point_b, point_a.shape[-1], -1) sum_prod_a_b = (point_a * point_b).sum(-1, keepdims=True) sum_prod_a_b = gs.repeat(sum_prod_a_b, point_a.shape[-1], -1) add_nominator = ((1 + 2 * sum_prod_a_b + norm_point_b) * point_a + (1 - norm_point_a) * point_b) add_denominator = (1 + 2 * sum_prod_a_b + norm_point_a * norm_point_b) mobius_add = add_nominator / add_denominator return mobius_add
def test_binary_op_vec(self, func_name, a, b): gs_fnc = get_backend_fnc(func_name) res = gs_fnc(a, b) a_expanded = gs.expand_dims(a, 0) b_expanded = gs.expand_dims(b, 0) a_rep = gs.repeat(a_expanded, 2, axis=0) b_rep = gs.repeat(b_expanded, 2, axis=0) res_a_rep = gs_fnc(a_rep, b) res_b_rep = gs_fnc(a, b_rep) res_a_b_rep = gs_fnc(a_rep, b_rep) res_a_expanded = gs_fnc(a_expanded, b_rep) res_b_expanded = gs_fnc(a_rep, b_expanded) self.assertAllClose(res_a_rep, res_a_b_rep) self.assertAllClose(res_b_rep, res_a_b_rep) self.assertAllClose(res_a_expanded, res_a_b_rep) self.assertAllClose(res_b_expanded, res_a_b_rep) for res_ in res_a_b_rep: self.assertAllClose(res_, res)
def belongs(self, point): """ Evaluate if a point belongs to the Minkowski space. """ point = gs.to_ndarray(point, to_ndim=2) n_points, point_dim = point.shape belongs = point_dim == self.dimension belongs = gs.repeat(belongs, repeats=n_points, axis=0) belongs = gs.to_ndarray(belongs, to_ndim=2, axis=1) return belongs point_dim = point.shape[-1] return point_dim == self.dimension
def load_optical_nerves(): """Load data from data/optical_nerves/optical_nerves.txt. Load the dataset of sets of 5 landmarks, labelled S, T, I, N, V, in 3D on monkeys' optical nerve heads: - 1st landmark (S): superior aspect of the retina, - 2nd landmark (T): side of the retina closest to the temporal bone of the skull, - 3rd landmark (N): nose side of the retina, - 4th landmark (I): inferior point, - 5th landmarks (V): optical nerve head deepest point. For each monkey, an experimental glaucoma was introduced in one eye, while the second eye was kept as control. This dataset can be used to investigate a significant difference between the glaucoma and the control eyes. Label 0 refers to a normal eye, and Label 1 to an eye with glaucoma. References ---------- .. [PE2015] V. Patrangenaru and L. Ellingson. Nonparametric Statistics on Manifolds and Their Applications to Object Data, 2015. https://doi.org/10.1201/b18969 Returns ------- data : array-like, shape=[22, 5, 3] Data representing the 5 landmarks, in 3D, for 11 different monkeys. labels : array-like, shape=[22,] Labels in {0, 1} classifying the corresponding optical nerve as normal (label = 0) or glaucoma (label = 1). monkeys : array-like, shape=[22,] Indices in 0...10 referencing the index of the monkey to which a given optical nerve belongs. """ nerves = pd.read_csv(OPTICAL_NERVES_PATH, sep="\t") nerves = nerves.set_index("Filename") nerves = nerves.drop(index=["laljn103.12b", "lalj0103.12b"]) nerves = nerves.reset_index(drop=True) nerves_gs = gs.array(nerves.values) data = gs.reshape(nerves_gs, (nerves_gs.shape[0], -1, 3)) labels = gs.tile([0, 1], [nerves_gs.shape[0] // 2]) monkeys = gs.repeat(gs.arange(11), 2) return data, labels, monkeys
def update_means(self, data, posterior_probabilities): """Update means.""" n_gaussians = posterior_probabilities.shape[-1] mean = FrechetMean(metric=self.metric, method=self.mean_method, lr=self.lr_mean, epsilon=self.tol_mean, max_iter=self.max_iter_mean, point_type=self.point_type) data_expand = gs.expand_dims(data, 1) data_expand = gs.repeat(data_expand, n_gaussians, axis=1) mean.fit(data_expand, weights=posterior_probabilities) self.means = gs.squeeze(mean.estimate_)
def update_means(self, data, posterior_probabilities, lr_means, tau_means, max_iter=DEFAULT_MAX_ITER): """Means update function.""" n_gaussians = posterior_probabilities.shape[-1] mean = FrechetMean( metric=self.riemannian_metric, method=self.mean_method, lr=lr_means, tau=tau_means, max_iter=max_iter, point_type=self.point_type) data_expand = gs.expand_dims(data, 1) data_expand = gs.repeat(data_expand, n_gaussians, axis=1) mean.fit(data_expand, weights=posterior_probabilities) self.means = gs.squeeze(mean.estimate_)
def test_binary_op_vec_raises_error(self, func_name, a, b): a_rep = gs.repeat(gs.expand_dims(a, 0), 2, axis=0) b_rep = gs.repeat(gs.expand_dims(b, 0), 3, axis=0) self.test_binary_op_raises_error(func_name, a_rep, b_rep)
def log(self, point, base_point): """Compute Riemannian logarithm of a point wrt a base point. If point_type = 'poincare' then base_point belongs to the Poincare ball and point is a vector in the Euclidean space of the same dimension as the ball. Parameters ---------- point : array-like, shape=[n_samples, dimension + 1] Point in hyperbolic space. base_point : array-like, shape=[n_samples, dimension + 1] Point in hyperbolic space. Returns ------- log : array-like, shape=[n_samples, dimension + 1] Tangent vector at the base point equal to the Riemannian logarithm of point at the base point. """ if self.point_type == 'extrinsic': point = gs.to_ndarray(point, to_ndim=2) base_point = gs.to_ndarray(base_point, to_ndim=2) angle = self.dist(base_point, point) / self.scale angle = gs.to_ndarray(angle, to_ndim=1) angle = gs.to_ndarray(angle, to_ndim=2) mask_0 = gs.isclose(angle, 0.) mask_else = ~mask_0 mask_0_float = gs.cast(mask_0, gs.float32) mask_else_float = gs.cast(mask_else, gs.float32) coef_1 = gs.zeros_like(angle) coef_2 = gs.zeros_like(angle) coef_1 += mask_0_float * (1. + INV_SINH_TAYLOR_COEFFS[1] * angle**2 + INV_SINH_TAYLOR_COEFFS[3] * angle**4 + INV_SINH_TAYLOR_COEFFS[5] * angle**6 + INV_SINH_TAYLOR_COEFFS[7] * angle**8) coef_2 += mask_0_float * (1. + INV_TANH_TAYLOR_COEFFS[1] * angle**2 + INV_TANH_TAYLOR_COEFFS[3] * angle**4 + INV_TANH_TAYLOR_COEFFS[5] * angle**6 + INV_TANH_TAYLOR_COEFFS[7] * angle**8) # This avoids dividing by 0. angle += mask_0_float * 1. coef_1 += mask_else_float * (angle / gs.sinh(angle)) coef_2 += mask_else_float * (angle / gs.tanh(angle)) log = (gs.einsum('ni,nj->nj', coef_1, point) - gs.einsum('ni,nj->nj', coef_2, base_point)) return log elif self.point_type == 'ball': add_base_point = self.mobius_add(-base_point, point) norm_add = gs.to_ndarray(gs.linalg.norm(add_base_point, axis=-1), 2, -1) norm_add = gs.repeat(norm_add, base_point.shape[-1], -1) norm_base_point = gs.to_ndarray( gs.linalg.norm(base_point, axis=-1), 2, -1) norm_base_point = gs.repeat(norm_base_point, base_point.shape[-1], -1) log = (1 - norm_base_point**2) * gs.arctanh(norm_add)\ * (add_base_point / norm_add) mask_0 = gs.all(gs.isclose(norm_add, 0.)) log[mask_0] = 0 return log else: raise NotImplementedError( 'log is only implemented for ball and extrinsic')
def exp(self, tangent_vec, base_point): """Compute the Riemannian exponential of a tangent vector. Parameters ---------- tangent_vec : array-like, shape=[n_samples, dimension + 1] Tangent vector at a base point. base_point : array-like, shape=[n_samples, dimension + 1] Point in hyperbolic space. Returns ------- exp : array-like, shape=[n_samples, dimension + 1] Point in hyperbolic space equal to the Riemannian exponential of tangent_vec at the base point. """ if self.point_type == 'extrinsic': tangent_vec = gs.to_ndarray(tangent_vec, to_ndim=2) base_point = gs.to_ndarray(base_point, to_ndim=2) sq_norm_tangent_vec = self.embedding_metric.squared_norm( tangent_vec) sq_norm_tangent_vec = gs.clip(sq_norm_tangent_vec, 0, math.inf) norm_tangent_vec = gs.sqrt(sq_norm_tangent_vec) mask_0 = gs.isclose(sq_norm_tangent_vec, 0.) mask_0 = gs.to_ndarray(mask_0, to_ndim=1) mask_else = ~mask_0 mask_else = gs.to_ndarray(mask_else, to_ndim=1) mask_0_float = gs.cast(mask_0, gs.float32) mask_else_float = gs.cast(mask_else, gs.float32) coef_1 = gs.zeros_like(norm_tangent_vec) coef_2 = gs.zeros_like(norm_tangent_vec) coef_1 += mask_0_float * ( 1. + COSH_TAYLOR_COEFFS[2] * norm_tangent_vec**2 + COSH_TAYLOR_COEFFS[4] * norm_tangent_vec**4 + COSH_TAYLOR_COEFFS[6] * norm_tangent_vec**6 + COSH_TAYLOR_COEFFS[8] * norm_tangent_vec**8) coef_2 += mask_0_float * ( 1. + SINH_TAYLOR_COEFFS[3] * norm_tangent_vec**2 + SINH_TAYLOR_COEFFS[5] * norm_tangent_vec**4 + SINH_TAYLOR_COEFFS[7] * norm_tangent_vec**6 + SINH_TAYLOR_COEFFS[9] * norm_tangent_vec**8) # This avoids dividing by 0. norm_tangent_vec += mask_0_float * 1.0 coef_1 += mask_else_float * (gs.cosh(norm_tangent_vec)) coef_2 += mask_else_float * ((gs.sinh(norm_tangent_vec) / (norm_tangent_vec))) exp = (gs.einsum('ni,nj->nj', coef_1, base_point) + gs.einsum('ni,nj->nj', coef_2, tangent_vec)) hyperbolic_space = Hyperbolic(dimension=self.dimension) exp = hyperbolic_space.regularize(exp) return exp elif self.point_type == 'ball': norm_base_point = gs.to_ndarray(gs.linalg.norm(base_point, -1), 2, -1) norm_base_point = gs.repeat(norm_base_point, base_point.shape[-1], -1) den = 1 - norm_base_point**2 norm_tan = gs.to_ndarray(gs.linalg.norm(tangent_vec, axis=-1), 2, -1) norm_tan = gs.repeat(norm_tan, base_point.shape[-1], -1) lambda_base_point = 1 / den direction = tangent_vec / norm_tan factor = gs.tanh(lambda_base_point * norm_tan) exp = self.mobius_add(base_point, direction * factor) return exp else: raise NotImplementedError( 'exp is only implemented for ball and extrinsic')
def mean(self, points, weights=None, n_max_iterations=32, epsilon=EPSILON, point_type='vector', mean_method='default', verbose=False): """Frechet mean of (weighted) points. Parameters ---------- points : array-like, shape=[n_samples, dimension] weights : array-like, shape=[n_samples, 1], optional verbose : bool, optional Returns ------- mean : array-like the Frechet mean of points, a point on the manifold """ if mean_method == 'default': # TODO(nina): Profile this code to study performance, # i.e. what to do with sq_dists_between_iterates. def while_loop_cond(iteration, mean, variance, sq_dist): result = ~gs.logical_or( gs.isclose(variance, 0.), gs.less_equal(sq_dist, epsilon * variance)) return result[0, 0] or iteration == 0 def while_loop_body(iteration, mean, variance, sq_dist): logs = self.log(point=points, base_point=mean) tangent_mean = gs.einsum('nk,nj->j', weights, logs) tangent_mean /= sum_weights mean_next = self.exp(tangent_vec=tangent_mean, base_point=mean) sq_dist = self.squared_dist(mean_next, mean) sq_dists_between_iterates.append(sq_dist) variance = self.variance(points=points, weights=weights, base_point=mean_next) mean = mean_next iteration += 1 return [iteration, mean, variance, sq_dist] if point_type == 'vector': points = gs.to_ndarray(points, to_ndim=2) if point_type == 'matrix': points = gs.to_ndarray(points, to_ndim=3) n_points = gs.shape(points)[0] if weights is None: weights = gs.ones((n_points, 1)) weights = gs.array(weights) weights = gs.to_ndarray(weights, to_ndim=2, axis=1) sum_weights = gs.sum(weights) mean = points[0] if point_type == 'vector': mean = gs.to_ndarray(mean, to_ndim=2) if point_type == 'matrix': mean = gs.to_ndarray(mean, to_ndim=3) if n_points == 1: return mean sq_dists_between_iterates = [] iteration = 0 sq_dist = gs.array([[0.]]) variance = gs.array([[0.]]) last_iteration, mean, variance, sq_dist = gs.while_loop( lambda i, m, v, sq: while_loop_cond(i, m, v, sq), lambda i, m, v, sq: while_loop_body(i, m, v, sq), loop_vars=[iteration, mean, variance, sq_dist], maximum_iterations=n_max_iterations) if last_iteration == n_max_iterations: print('Maximum number of iterations {} reached.' 'The mean may be inaccurate'.format(n_max_iterations)) if verbose: print('n_iter: {}, final variance: {}, final dist: {}'.format( last_iteration, variance, sq_dist)) mean = gs.to_ndarray(mean, to_ndim=2) return mean if mean_method == 'frechet-poincare-ball': lr = 1e-3 tau = 5e-3 if len(points) == 1: return points iteration = 0 convergence = math.inf barycenter = points.mean(0, keepdims=True) * 0 while convergence > tau and n_max_iterations > iteration: iteration += 1 expand_barycenter = gs.repeat(barycenter, points.shape[0], 0) grad_tangent = 2 * self.log(points, expand_barycenter) cc_barycenter = self.exp( lr * grad_tangent.sum(0, keepdims=True), barycenter) convergence = self.dist(cc_barycenter, barycenter).max().item() barycenter = cc_barycenter if iteration == n_max_iterations: warnings.warn( 'Maximum number of iterations {} reached. The ' 'mean may be inaccurate'.format(n_max_iterations)) return barycenter
def _ball_gradient_descent(points, metric, weights=None, max_iter=32, lr=1e-3, tau=5e-3): """Perform ball gradient descent.""" points = gs.to_ndarray(points, to_ndim=2) if len(points) == 1: return points[0] if weights is None: iteration = 0 convergence = math.inf barycenter = gs.mean(points, axis=0, keepdims=True) while convergence > tau and max_iter > iteration: iteration += 1 grad_tangent = 2 * metric.log(points, barycenter) cc_barycenter = metric.exp(lr * grad_tangent.sum(0, keepdims=True), barycenter) convergence = metric.dist(cc_barycenter, barycenter).max().item() barycenter = cc_barycenter else: weights = gs.expand_dims(weights, -1) weights = gs.repeat(weights, points.shape[-1], axis=2) barycenter = (points * weights).sum(0, keepdims=True) / weights.sum(0) barycenter_gs = gs.squeeze(barycenter) points_gs = gs.squeeze(points) points_flattened = gs.reshape(points_gs, (-1, points_gs.shape[-1])) convergence = math.inf iteration = 0 while convergence > tau and max_iter > iteration: iteration += 1 barycenter_flattened = gs.repeat(barycenter, len(points_gs), axis=0) barycenter_flattened = gs.reshape( barycenter_flattened, (-1, barycenter_flattened.shape[-1])) grad_tangent = 2 * metric.log(points_flattened, barycenter_flattened) grad_tangent = gs.reshape(grad_tangent, points.shape) grad_tangent = grad_tangent * weights lr_grad_tangent = lr * grad_tangent.sum(0, keepdims=True) lr_grad_tangent_s = lr_grad_tangent.squeeze() cc_barycenter = metric.exp(barycenter_gs, lr_grad_tangent_s) convergence = metric.dist(cc_barycenter, barycenter_gs).max().item() barycenter_gs = cc_barycenter barycenter = gs.expand_dims(cc_barycenter, 0) barycenter = gs.squeeze(barycenter) if iteration == max_iter: logging.warning('Maximum number of iterations {} reached. The ' 'mean may be inaccurate'.format(max_iter)) return barycenter
def loss(self, example_embedding, context_embedding, negative_embedding): """Compute loss and grad. Compute loss and grad given embedding of the current example, embedding of the context and negative sampling embedding. Parameters ---------- example_embedding : array-like, shape=[dim] Current data sample embedding. context_embedding : array-like, shape=[dim] Current context embedding. negative_embedding: array-like, shape=[dim] Current negative sample embedding. Returns ------- total_loss : int The current value of the loss function. example_grad : array-like, shape=[dim] The gradient of the loss function at the embedding of the current data sample. """ n_edges, dim = negative_embedding.shape[0], example_embedding.shape[-1] example_embedding = gs.expand_dims(example_embedding, 0) context_embedding = gs.expand_dims(context_embedding, 0) positive_distance = self.manifold.metric.squared_dist( example_embedding, context_embedding) positive_loss = self.log_sigmoid(-positive_distance) reshaped_example_embedding = gs.repeat(example_embedding, n_edges, axis=0) negative_distance = self.manifold.metric.squared_dist( reshaped_example_embedding, negative_embedding) negative_loss = self.log_sigmoid(negative_distance) total_loss = -(positive_loss + gs.sum(negative_loss)) positive_log_sigmoid_grad = -self.grad_log_sigmoid(-positive_distance) positive_distance_grad = self.grad_squared_distance( example_embedding, context_embedding) positive_grad = (gs.repeat(positive_log_sigmoid_grad, dim, axis=-1) * positive_distance_grad) negative_distance_grad = self.grad_squared_distance( reshaped_example_embedding, negative_embedding) negative_distance = gs.to_ndarray(negative_distance, to_ndim=2, axis=-1) negative_log_sigmoid_grad = self.grad_log_sigmoid(negative_distance) negative_grad = negative_log_sigmoid_grad * negative_distance_grad example_grad = -(positive_grad + gs.sum(negative_grad, axis=0)) return total_loss, example_grad
def origin(self): return gs.diag(gs.repeat([1, 0], [self.k, self.n - self.k]))[0]