def find_normalization_factor(variances, variances_range,
                              normalization_factor_var):
    """Find the normalization factor given some variances.

    Parameters
    ----------
    variances : array-like, shape=[n_gaussians,]
        Array of standard deviations for each component
        of some GMM.
    variances_range : array-like, shape=[n_variances,]
        Array of standard deviations.
    normalization_factor_var : array-like, shape=[n_variances,]
        Array of computed normalization factor.

    Returns
    -------
    norm_factor : array-like, shape=[n_gaussians,]
        Array of normalization factors for the given
        variances.
    """
    n_gaussians, precision = variances.shape[0], variances_range.shape[0]

    ref = gs.expand_dims(variances_range, 0)
    ref = gs.repeat(ref, n_gaussians, axis=0)
    val = gs.expand_dims(variances, 1)
    val = gs.repeat(val, precision, axis=1)

    difference = gs.abs(ref - val)

    index = gs.argmin(difference, axis=-1)
    norm_factor = normalization_factor_var[index]

    return norm_factor
예제 #2
0
    def normalization_factor(self, variances):
        """Return normalization factor.

        Parameters
        ----------
        variances : array-like, shape=[n,]
            Array of equally distant values of the
            variance precision time.

        Returns
        -------
        norm_func : array-like, shape=[n,]
            Normalisation factor for all given variances.
        """
        binomial_coefficient = None
        n_samples = variances.shape[0]

        expand_variances = gs.expand_dims(variances, axis=0)
        expand_variances = gs.repeat(expand_variances, self.dim, axis=0)

        if binomial_coefficient is None:

            dim_range = gs.arange(self.dim)
            dim_range[0] = 1
            n_fact = dim_range.prod()

            k_fact = gs.concatenate([
                gs.expand_dims(dim_range[:i].prod(), 0)
                for i in range(1, dim_range.shape[0] + 1)
            ], 0)

            nmk_fact = gs.flip(k_fact, 0)

            binomial_coefficient = n_fact / (k_fact * nmk_fact)

        binomial_coefficient = gs.expand_dims(binomial_coefficient, -1)
        binomial_coefficient = gs.repeat(binomial_coefficient,
                                         n_samples,
                                         axis=1)

        range_ = gs.expand_dims(gs.arange(self.dim), -1)
        range_ = gs.repeat(range_, n_samples, axis=1)

        ones_ = gs.expand_dims(gs.ones(self.dim), -1)
        ones_ = gs.repeat(ones_, n_samples, axis=1)

        alternate_neg = (-ones_)**(range_)

        erf_arg = ((
            (self.dim - 1) - 2 * range_) * expand_variances) / gs.sqrt(2)
        exp_arg = ((((self.dim - 1) - 2 * range_) * expand_variances) /
                   gs.sqrt(2))**2
        norm_func_1 = (1 + gs.erf(erf_arg)) * gs.exp(exp_arg)
        norm_func_2 = binomial_coefficient * norm_func_1
        norm_func_3 = alternate_neg * norm_func_2

        norm_func = NORMALIZATION_FACTOR_CST * variances * \
            norm_func_3.sum(0) * (1 / (2 ** (self.dim - 1)))

        return norm_func
예제 #3
0
    def permute_vectorization_test_data(self):
        space = self._PointSet(2)
        points = space.random_point(3)

        permutation = gs.array([0, 1])

        smoke_data = [
            dict(space=space, graph=points[0], id_permutation=permutation),
            dict(
                space=space,
                graph=points[0],
                id_permutation=gs.repeat(gs.expand_dims(permutation, 0),
                                         2,
                                         axis=0),
            ),
            dict(space=space, graph=points, id_permutation=permutation),
            dict(
                space=space,
                graph=points,
                id_permutation=gs.repeat(gs.expand_dims(permutation, 0),
                                         points.shape[0],
                                         axis=0),
            ),
        ]

        return self.generate_tests(smoke_data)
예제 #4
0
    def norm_factor_gradient(self, variances):
        """Compute normalization factor and its gradient.

        Compute normalization factor given current variance
        and dimensionality.

        Parameters
        ----------
        variances : array-like, shape=[n]
            Value of variance.

        Returns
        -------
        norm_factor : array-like, shape=[n]
            Normalisation factor.
        norm_factor_gradient : array-like, shape=[n]
            Gradient of the normalization factor.
        """
        variances = gs.transpose(gs.to_ndarray(variances, to_ndim=2))
        dim_range = gs.arange(0, self.dim, 1.0)
        alpha = self._compute_alpha(dim_range)

        binomial_coefficient = gs.ones(self.dim)
        binomial_coefficient[1:] = (self.dim - 1 + 1 - dim_range[1:]) / dim_range[1:]
        binomial_coefficient = gs.cumprod(binomial_coefficient)

        beta = ((-gs.ones(self.dim)) ** dim_range) * binomial_coefficient

        sigma_repeated = gs.repeat(variances, self.dim, -1)
        prod_alpha_sigma = gs.einsum("ij,j->ij", sigma_repeated, alpha)
        term_2 = gs.exp((prod_alpha_sigma) ** 2) * (1 + gs.erf(prod_alpha_sigma))
        term_1 = gs.sqrt(gs.pi / 2.0) * (1.0 / (2 ** (self.dim - 1)))
        term_2 = gs.einsum("ij,j->ij", term_2, beta)
        norm_factor = term_1 * variances * gs.sum(term_2, axis=-1, keepdims=True)
        grad_term_1 = 1 / variances

        grad_term_21 = 1 / gs.sum(term_2, axis=-1, keepdims=True)

        grad_term_211 = (
            gs.exp((prod_alpha_sigma) ** 2)
            * (1 + gs.erf(prod_alpha_sigma))
            * gs.einsum("ij,j->ij", sigma_repeated, alpha**2)
            * 2
        )

        grad_term_212 = gs.repeat(
            gs.expand_dims((2 / gs.sqrt(gs.pi)) * alpha, axis=0),
            variances.shape[0],
            axis=0,
        )

        grad_term_22 = grad_term_211 + grad_term_212
        grad_term_22 = gs.einsum("ij, j->ij", grad_term_22, beta)
        grad_term_22 = gs.sum(grad_term_22, axis=-1, keepdims=True)

        norm_factor_gradient = grad_term_1 + (grad_term_21 * grad_term_22)

        return gs.squeeze(norm_factor), gs.squeeze(norm_factor_gradient)
예제 #5
0
    def gmm_pdf(
            data, means, variances, norm_func,
            metric, variances_range, norm_func_var):
        """Return the separate probability density function of GMM.

        The probability density function is computed for
        each component of the GMM separately (i.e., mixture coefficients
        are not taken into account).

        Parameters
        ----------
        data : array-like, shape=[n_samples, dim]
            Points at which the GMM probability density is computed.
        means : array-like, shape=[n_gaussians, dim]
            Means of each component of the GMM.
        variances : array-like, shape=[n_gaussians,]
            Variances of each component of the GMM.
        norm_func : function
            Normalisation factor function.
        metric : function
            Distance function associated with the used metric.

        Returns
        -------
        pdf : array-like, shape=[n_samples, n_gaussians,]
            Probability density function computed at each data
            sample and for each component of the GMM.
        """
        data_length, _, _ = data.shape + (means.shape[0],)

        variances_expanded = gs.expand_dims(variances, 0)
        variances_expanded = gs.repeat(variances_expanded, data_length, 0)

        variances_flatten = variances_expanded.flatten()

        distances = -(metric.dist_broadcast(data, means) ** 2)
        distances = gs.reshape(distances, (data.shape[0] * variances.shape[0]))

        num = gs.exp(
            distances / (2 * variances_flatten ** 2))

        den = norm_func(variances, variances_range, norm_func_var)

        den = gs.expand_dims(den, 0)
        den = gs.repeat(den, data_length, axis=0).flatten()

        pdf = num / den
        pdf = gs.reshape(
            pdf, (data.shape[0], means.shape[0]))

        return pdf
예제 #6
0
    def weighted_gmm_pdf(mixture_coefficients,
                         mesh_data,
                         means,
                         variances,
                         metric):
        """Return the probability density function of a GMM.

        Parameters
        ----------
        mixture_coefficients : array-like, shape=[n_gaussians,]
            Coefficients of the Gaussian mixture model.
        mesh_data : array-like, shape=[n_precision, dim]
            Points at which the GMM probability density is computed.
        means : array-like, shape=[n_gaussians, dim]
            Means of each component of the GMM.
        variances : array-like, shape=[n_gaussians,]
            Variances of each component of the GMM.
        metric : function
            Distance function associated with the used metric.

        Returns
        -------
        weighted_pdf : array-like, shape=[n_precision, n_gaussians,]
            Probability density function computed for each point of
            the mesh data, for each component of the GMM.
        """
        distance_to_mean = metric.dist_broadcast(mesh_data, means)

        variances_units = gs.expand_dims(variances, 0)
        variances_units = gs.repeat(
            variances_units, distance_to_mean.shape[0], axis=0)

        distribution_normal = gs.exp(
            -(distance_to_mean ** 2) / (2 * variances_units ** 2))

        zeta_sigma = PI_2_3 * variances
        zeta_sigma = zeta_sigma * gs.exp(
            (variances ** 2 / 2) * gs.erf(variances / gs.sqrt(2)))

        result_num = gs.expand_dims(mixture_coefficients, 0)
        result_num = gs.repeat(
            result_num, len(distribution_normal), axis=0)
        result_num = result_num * distribution_normal

        result_denum = gs.expand_dims(zeta_sigma, 0)
        result_denum = gs.repeat(
            result_denum, len(distribution_normal), axis=0)

        weighted_pdf = result_num / result_denum

        return weighted_pdf
예제 #7
0
def loss(example_embedding, context_embedding, negative_embedding, manifold):
    """Compute loss and grad.

    Compute loss and grad given embedding of the current example,
    embedding of the context and negative sampling embedding.
    """
    n_edges, dim =\
        negative_embedding.shape[0], example_embedding.shape[-1]
    example_embedding = gs.expand_dims(example_embedding, 0)
    context_embedding = gs.expand_dims(context_embedding, 0)

    positive_distance =\
        manifold.metric.squared_dist(
            example_embedding, context_embedding)
    positive_loss =\
        log_sigmoid(-positive_distance)

    reshaped_example_embedding =\
        gs.repeat(example_embedding, n_edges, axis=0)

    negative_distance =\
        manifold.metric.squared_dist(
            reshaped_example_embedding, negative_embedding)
    negative_loss = log_sigmoid(negative_distance)

    total_loss = -(positive_loss + negative_loss.sum())

    positive_log_sigmoid_grad =\
        -grad_log_sigmoid(-positive_distance)

    positive_distance_grad =\
        grad_squared_distance(example_embedding, context_embedding)

    positive_grad =\
        gs.repeat(positive_log_sigmoid_grad, dim, axis=-1)\
        * positive_distance_grad

    negative_distance_grad =\
        grad_squared_distance(reshaped_example_embedding, negative_embedding)

    negative_distance = gs.to_ndarray(negative_distance, to_ndim=2, axis=-1)
    negative_log_sigmoid_grad =\
        grad_log_sigmoid(negative_distance)

    negative_grad = negative_log_sigmoid_grad\
        * negative_distance_grad

    example_grad = -(positive_grad + negative_grad.sum(axis=0))

    return total_loss, example_grad
예제 #8
0
    def _ball_to_extrinsic_coordinates(point):
        """Convert ball to extrinsic coordinates.

        Convert the parameterization of a point in hyperbolic space
        from its poincare ball model coordinates, to the extrinsic
        coordinates.

        Parameters
        ----------
        point : array-like, shape=[n_samples, dimension]
            Point in hyperbolic space in Poincare ball coordinates.

        Returns
        -------
        extrinsic : array-like, shape=[n_samples, dimension + 1]
            Point in hyperbolic space in extrinsic coordinates.
        """
        squared_norm = gs.sum(point**2, -1)
        denominator = 1 - squared_norm
        t = gs.to_ndarray((1 + squared_norm) / denominator, to_ndim=2, axis=1)
        expanded_denominator = gs.expand_dims(denominator, -1)
        expanded_denominator = gs.repeat(expanded_denominator, point.shape[-1],
                                         -1)
        intrinsic = (2 * point) / expanded_denominator
        return gs.concatenate([t, intrinsic], -1)
예제 #9
0
    def closest_neighbor_index(self, point, neighbors):
        """Closest neighbor of point among neighbors.

        Parameters
        ----------
        point : array-like, shape=[..., dim]
            Point.
        neighbors : array-like, shape=[n_neighbors, dim]
            Neighbors.

        Returns
        -------
        closest_neighbor_index : int
            Index of closest neighbor.
        """
        n_points = point.shape[0] if gs.ndim(point) == gs.ndim(
            neighbors) else 1
        n_neighbors = neighbors.shape[0]

        if n_points > 1 and n_neighbors > 1:
            neighbors = gs.repeat(neighbors, n_points, axis=0)

            point = gs.concatenate([point for _ in range(n_neighbors)])

        closest_neighbor_index = gs.argmin(
            gs.transpose(
                gs.reshape(self.dist(point, neighbors),
                           (n_neighbors, n_points)), ),
            axis=1,
        )

        if n_points == 1:
            return closest_neighbor_index[0]

        return closest_neighbor_index
    def sample_y(self, X, n_samples=1, random_state=0):
        """Draw samples from Wrapped Gaussian process and evaluate at X.

        A fitted Wrapped Gaussian process can be use to sample
        values through the following steps:

            - Use the stored Gaussian process regression on the dataset
                to sample tangent values
            - Compute the base-points using the prior
            - Flatten (and repeat if needed) both the base-points and the
                tangent samples to benefit from vectorized computation.
            - Map the tangent samples on the manifold via the metric's exp with the
                flattened and repeated base-points yielded by the prior

        Parameters
        ----------
        X : array-like of shape (n_samples_X, n_features) or list of object
            Query points where the WGP is evaluated.
        n_samples : int, default=1
            Number of samples drawn from the Wrapped Gaussian process per query point.
        random_state : int, RandomState instance or None, default=0
            Determines random number generation to randomly draw samples.
            Pass an int for reproducible results across multiple function
            calls.

        Returns
        -------
        y_samples : ndarray of shape (n_samples_X, n_samples), or \
            (n_samples_X, n_targets, n_samples)
            Values of n_samples samples drawn from wrapped Gaussian process and
            evaluated at query points.
        """
        tangent_samples = self._euclidean_gpr.sample_y(X, n_samples,
                                                       random_state)
        tangent_samples = gs.cast(tangent_samples, dtype=X.dtype)
        # flatten the samples
        tangent_samples = gs.reshape(gs.transpose(tangent_samples, [0, 2, 1]),
                                     (-1, *self.y_train_shape_))

        # generate the base_points
        base_points = self.prior(X)
        # repeat the base points in order to match the tangent samples
        base_points = gs.repeat(gs.expand_dims(base_points, 2),
                                n_samples,
                                axis=2)
        # flatten the base_points
        base_points = gs.reshape(gs.transpose(base_points, [0, 2, 1]),
                                 (-1, *self.y_train_shape_))

        # get the flattened samples
        y_samples = self.metric.exp(tangent_samples, base_point=base_points)
        y_samples = gs.transpose(
            gs.reshape(y_samples,
                       (X.shape[0], n_samples, *self.y_train_shape_)),
            [0, 2, 1],
        )

        return y_samples
 def test_weighted_frechet_mean(self):
     """Test for weighted mean."""
     data = gs.array([[0.1, 0.2], [0.25, 0.35], [-0.1, -0.2], [-0.4, 0.3]])
     weights = gs.repeat([0.5], data.shape[0])
     mean_o = FrechetMean(metric=self.metric, point_type='vector')
     mean_o.fit(data, weights)
     mean = mean_o.estimate_
     mean_verdict = [-0.03857, 0.15922]
     self.assertAllClose(mean, mean_verdict, TOLERANCE)
예제 #12
0
    def find_variance_from_index(weighted_distances, variances_range,
                                 phi_inv_var):
        r"""Return the variance given weighted distances.

        Parameters
        ----------
        weighted_distances : array-like, shape=[n_gaussians,]
            Mean of the weighted distances between training data
            and current barycentres. The weights of each data sample
            corresponds to the probability of belonging to a component
            of the Gaussian mixture model.
        variances_range : array-like, shape=[n_variances,]
            Array of standard deviations.
        phi_inv_var : array-like, shape=[n_variances,]
            Array of the computed inverse of a function phi
            whose expression is closed-form
            :math:`\sigma\mapsto \sigma^3 \times \frac{d  }
            {\mathstrut d\sigma}\log \zeta_m(\sigma)'
            where :math:'\sigma' denotes the variance
            and :math:'\zeta' the normalization coefficient
            and :math:'m' the dimension.

        Returns
        -------
        var : array-like, shape=[n_gaussians,]
            Estimated variances for each component of the GMM.
        """
        n_gaussians, precision = \
            weighted_distances.shape[0], variances_range.shape[0]

        ref = gs.expand_dims(phi_inv_var, 0)
        ref = gs.repeat(ref, n_gaussians, axis=0)

        val = gs.expand_dims(weighted_distances, 1)
        val = gs.repeat(val, precision, axis=1)

        abs_difference = gs.abs(ref - val)

        index = gs.argmin(abs_difference, -1)

        var = variances_range[index]

        return var
예제 #13
0
    def mobius_add(self, point_a, point_b):
        r"""Compute the Mobius addition of two points.

        Mobius addition operation that is a necessary operation
        to compute the log and exp using the 'ball' representation.

        .. math::

            a\oplus b=\frac{(1+2\langle a,b\rangle + ||b||^2)a+
            (1-||a||^2)b}{1+2\langle a,b\rangle + ||a||^2||b||^2}

        Parameters
        ----------
        point_a : array-like, shape=[n_samples, dimension + 1]
            Point in hyperbolic space.
        point_b : array-like, shape=[n_samples, dimension + 1]
            Point in hyperbolic space.

        Returns
        -------
        mobius_add : array-like, shape=[n_samples, 1]
            Result of the Mobius addition.
        """
        norm_point_a = gs.sum(point_a**2, axis=-1, keepdims=True)

        # to redefine to use autograd
        norm_point_a = gs.repeat(norm_point_a, point_a.shape[-1], -1)

        norm_point_b = gs.sum(point_b**2, axis=-1, keepdims=True)
        norm_point_b = gs.repeat(norm_point_b, point_a.shape[-1], -1)

        sum_prod_a_b = gs.sum(point_a * point_b, axis=-1, keepdims=True)

        sum_prod_a_b = gs.repeat(sum_prod_a_b, point_a.shape[-1], -1)

        add_nominator = ((1 + 2 * sum_prod_a_b + norm_point_b) * point_a +
                         (1 - norm_point_a) * point_b)

        add_denominator = (1 + 2 * sum_prod_a_b + norm_point_a * norm_point_b)

        mobius_add = add_nominator / add_denominator

        return mobius_add
예제 #14
0
    def test_unary_op_vec(self, func_name, a):
        gs_fnc = get_backend_fnc(func_name)

        res = gs_fnc(a)

        a_expanded = gs.expand_dims(a, 0)
        a_rep = gs.repeat(a_expanded, 2, axis=0)

        res_a_rep = gs_fnc(a_rep)
        for res_ in res_a_rep:
            self.assertAllClose(res_, res)
예제 #15
0
    def belongs(self, point):
        """
        Check if point belongs to the Euclidean space.
        """
        point = gs.to_ndarray(point, to_ndim=2)
        n_points, point_dim = point.shape
        belongs = point_dim == self.dimension
        belongs = gs.repeat(belongs, repeats=n_points, axis=0)
        belongs = gs.to_ndarray(belongs, to_ndim=2, axis=1)

        return belongs
예제 #16
0
    def mobius_add(self, point_a, point_b):
        """Compute the mobius addition of two points.

        Mobius addition is necessary for computation of the log and exp
        using the 'poincare' representation set as point_type.

        Parameters
        ----------
        point_a : array-like, shape=[n_samples, dimension + 1]
                              or shape=[1, dimension + 1]
        point_b : array-like, shape=[n_samples, dimension + 1]
                              or shape=[1, dimension + 1]

        Returns
        -------
        mobius_add : array-like, shape=[n_samples, 1]
                           or shape=[1, 1]
        """
        norm_point_a = gs.sum(point_a**2, axis=-1, keepdims=True)

        # to redefine to use autograd
        norm_point_a = gs.repeat(norm_point_a, point_a.shape[-1], -1)

        norm_point_b = gs.sum(point_b**2, axis=-1, keepdims=True)
        norm_point_b = gs.repeat(norm_point_b, point_a.shape[-1], -1)

        sum_prod_a_b = (point_a * point_b).sum(-1, keepdims=True)

        sum_prod_a_b = gs.repeat(sum_prod_a_b, point_a.shape[-1], -1)

        add_nominator = ((1 + 2 * sum_prod_a_b + norm_point_b) * point_a +
                         (1 - norm_point_a) * point_b)

        add_denominator = (1 + 2 * sum_prod_a_b + norm_point_a * norm_point_b)

        mobius_add = add_nominator / add_denominator

        return mobius_add
예제 #17
0
    def test_binary_op_vec(self, func_name, a, b):
        gs_fnc = get_backend_fnc(func_name)

        res = gs_fnc(a, b)

        a_expanded = gs.expand_dims(a, 0)
        b_expanded = gs.expand_dims(b, 0)

        a_rep = gs.repeat(a_expanded, 2, axis=0)
        b_rep = gs.repeat(b_expanded, 2, axis=0)

        res_a_rep = gs_fnc(a_rep, b)
        res_b_rep = gs_fnc(a, b_rep)
        res_a_b_rep = gs_fnc(a_rep, b_rep)
        res_a_expanded = gs_fnc(a_expanded, b_rep)
        res_b_expanded = gs_fnc(a_rep, b_expanded)

        self.assertAllClose(res_a_rep, res_a_b_rep)
        self.assertAllClose(res_b_rep, res_a_b_rep)
        self.assertAllClose(res_a_expanded, res_a_b_rep)
        self.assertAllClose(res_b_expanded, res_a_b_rep)
        for res_ in res_a_b_rep:
            self.assertAllClose(res_, res)
예제 #18
0
    def belongs(self, point):
        """
        Evaluate if a point belongs to the Minkowski space.
        """
        point = gs.to_ndarray(point, to_ndim=2)
        n_points, point_dim = point.shape
        belongs = point_dim == self.dimension
        belongs = gs.repeat(belongs, repeats=n_points, axis=0)
        belongs = gs.to_ndarray(belongs, to_ndim=2, axis=1)

        return belongs

        point_dim = point.shape[-1]
        return point_dim == self.dimension
예제 #19
0
def load_optical_nerves():
    """Load data from data/optical_nerves/optical_nerves.txt.

    Load the dataset of sets of 5 landmarks, labelled S, T, I, N, V, in 3D
    on monkeys' optical nerve heads:

    - 1st landmark (S): superior aspect of the retina,
    - 2nd landmark (T): side of the retina closest to the temporal
      bone of the skull,
    - 3rd landmark (N): nose side of the retina,
    - 4th landmark (I): inferior point,
    - 5th landmarks (V): optical nerve head deepest point.

    For each monkey, an experimental glaucoma was introduced in one eye,
    while the second eye was kept as control. This dataset can be used to
    investigate a significant difference between the glaucoma and the
    control eyes.

    Label 0 refers to a normal eye, and Label 1 to an eye with glaucoma.

    References
    ----------
    .. [PE2015] V. Patrangenaru and L. Ellingson. Nonparametric Statistics
        on Manifolds and Their Applications to Object Data, 2015.
        https://doi.org/10.1201/b18969


    Returns
    -------
    data : array-like, shape=[22, 5, 3]
        Data representing the 5 landmarks, in 3D, for 11 different monkeys.
    labels : array-like, shape=[22,]
        Labels in {0, 1} classifying the corresponding optical nerve as
        normal (label = 0) or glaucoma (label = 1).
    monkeys : array-like, shape=[22,]
        Indices in 0...10 referencing the index of the monkey to which a given
        optical nerve belongs.
    """
    nerves = pd.read_csv(OPTICAL_NERVES_PATH, sep="\t")
    nerves = nerves.set_index("Filename")
    nerves = nerves.drop(index=["laljn103.12b", "lalj0103.12b"])
    nerves = nerves.reset_index(drop=True)
    nerves_gs = gs.array(nerves.values)

    data = gs.reshape(nerves_gs, (nerves_gs.shape[0], -1, 3))
    labels = gs.tile([0, 1], [nerves_gs.shape[0] // 2])
    monkeys = gs.repeat(gs.arange(11), 2)

    return data, labels, monkeys
    def update_means(self, data, posterior_probabilities):
        """Update means."""
        n_gaussians = posterior_probabilities.shape[-1]

        mean = FrechetMean(metric=self.metric,
                           method=self.mean_method,
                           lr=self.lr_mean,
                           epsilon=self.tol_mean,
                           max_iter=self.max_iter_mean,
                           point_type=self.point_type)

        data_expand = gs.expand_dims(data, 1)
        data_expand = gs.repeat(data_expand, n_gaussians, axis=1)

        mean.fit(data_expand, weights=posterior_probabilities)
        self.means = gs.squeeze(mean.estimate_)
    def update_means(self, data, posterior_probabilities,
                     lr_means, tau_means, max_iter=DEFAULT_MAX_ITER):
        """Means update function."""
        n_gaussians = posterior_probabilities.shape[-1]

        mean = FrechetMean(
            metric=self.riemannian_metric,
            method=self.mean_method,
            lr=lr_means,
            tau=tau_means,
            max_iter=max_iter,
            point_type=self.point_type)

        data_expand = gs.expand_dims(data, 1)
        data_expand = gs.repeat(data_expand, n_gaussians, axis=1)

        mean.fit(data_expand, weights=posterior_probabilities)
        self.means = gs.squeeze(mean.estimate_)
예제 #22
0
    def test_binary_op_vec_raises_error(self, func_name, a, b):
        a_rep = gs.repeat(gs.expand_dims(a, 0), 2, axis=0)
        b_rep = gs.repeat(gs.expand_dims(b, 0), 3, axis=0)

        self.test_binary_op_raises_error(func_name, a_rep, b_rep)
예제 #23
0
    def log(self, point, base_point):
        """Compute Riemannian logarithm of a point wrt a base point.

        If point_type = 'poincare' then base_point belongs
        to the Poincare ball and point is a vector in the Euclidean
        space of the same dimension as the ball.

        Parameters
        ----------
        point : array-like, shape=[n_samples, dimension + 1]
            Point in hyperbolic space.
        base_point : array-like, shape=[n_samples, dimension + 1]
            Point in hyperbolic space.

        Returns
        -------
        log : array-like, shape=[n_samples, dimension + 1]
            Tangent vector at the base point equal to the Riemannian logarithm
            of point at the base point.
        """
        if self.point_type == 'extrinsic':
            point = gs.to_ndarray(point, to_ndim=2)
            base_point = gs.to_ndarray(base_point, to_ndim=2)

            angle = self.dist(base_point, point) / self.scale
            angle = gs.to_ndarray(angle, to_ndim=1)
            angle = gs.to_ndarray(angle, to_ndim=2)

            mask_0 = gs.isclose(angle, 0.)
            mask_else = ~mask_0

            mask_0_float = gs.cast(mask_0, gs.float32)
            mask_else_float = gs.cast(mask_else, gs.float32)

            coef_1 = gs.zeros_like(angle)
            coef_2 = gs.zeros_like(angle)

            coef_1 += mask_0_float * (1. + INV_SINH_TAYLOR_COEFFS[1] * angle**2
                                      + INV_SINH_TAYLOR_COEFFS[3] * angle**4 +
                                      INV_SINH_TAYLOR_COEFFS[5] * angle**6 +
                                      INV_SINH_TAYLOR_COEFFS[7] * angle**8)
            coef_2 += mask_0_float * (1. + INV_TANH_TAYLOR_COEFFS[1] * angle**2
                                      + INV_TANH_TAYLOR_COEFFS[3] * angle**4 +
                                      INV_TANH_TAYLOR_COEFFS[5] * angle**6 +
                                      INV_TANH_TAYLOR_COEFFS[7] * angle**8)

            # This avoids dividing by 0.
            angle += mask_0_float * 1.

            coef_1 += mask_else_float * (angle / gs.sinh(angle))
            coef_2 += mask_else_float * (angle / gs.tanh(angle))

            log = (gs.einsum('ni,nj->nj', coef_1, point) -
                   gs.einsum('ni,nj->nj', coef_2, base_point))
            return log

        elif self.point_type == 'ball':

            add_base_point = self.mobius_add(-base_point, point)

            norm_add = gs.to_ndarray(gs.linalg.norm(add_base_point, axis=-1),
                                     2, -1)
            norm_add = gs.repeat(norm_add, base_point.shape[-1], -1)
            norm_base_point = gs.to_ndarray(
                gs.linalg.norm(base_point, axis=-1), 2, -1)
            norm_base_point = gs.repeat(norm_base_point, base_point.shape[-1],
                                        -1)

            log = (1 - norm_base_point**2) * gs.arctanh(norm_add)\
                * (add_base_point / norm_add)

            mask_0 = gs.all(gs.isclose(norm_add, 0.))
            log[mask_0] = 0

            return log
        else:
            raise NotImplementedError(
                'log is only implemented for ball and extrinsic')
예제 #24
0
    def exp(self, tangent_vec, base_point):
        """Compute the Riemannian exponential of a tangent vector.

        Parameters
        ----------
        tangent_vec : array-like, shape=[n_samples, dimension + 1]
            Tangent vector at a base point.
        base_point : array-like, shape=[n_samples, dimension + 1]
            Point in hyperbolic space.

        Returns
        -------
        exp : array-like, shape=[n_samples, dimension + 1]
            Point in hyperbolic space equal to the Riemannian exponential
            of tangent_vec at the base point.
        """
        if self.point_type == 'extrinsic':
            tangent_vec = gs.to_ndarray(tangent_vec, to_ndim=2)
            base_point = gs.to_ndarray(base_point, to_ndim=2)

            sq_norm_tangent_vec = self.embedding_metric.squared_norm(
                tangent_vec)
            sq_norm_tangent_vec = gs.clip(sq_norm_tangent_vec, 0, math.inf)
            norm_tangent_vec = gs.sqrt(sq_norm_tangent_vec)

            mask_0 = gs.isclose(sq_norm_tangent_vec, 0.)
            mask_0 = gs.to_ndarray(mask_0, to_ndim=1)
            mask_else = ~mask_0
            mask_else = gs.to_ndarray(mask_else, to_ndim=1)
            mask_0_float = gs.cast(mask_0, gs.float32)
            mask_else_float = gs.cast(mask_else, gs.float32)

            coef_1 = gs.zeros_like(norm_tangent_vec)
            coef_2 = gs.zeros_like(norm_tangent_vec)

            coef_1 += mask_0_float * (
                1. + COSH_TAYLOR_COEFFS[2] * norm_tangent_vec**2 +
                COSH_TAYLOR_COEFFS[4] * norm_tangent_vec**4 +
                COSH_TAYLOR_COEFFS[6] * norm_tangent_vec**6 +
                COSH_TAYLOR_COEFFS[8] * norm_tangent_vec**8)
            coef_2 += mask_0_float * (
                1. + SINH_TAYLOR_COEFFS[3] * norm_tangent_vec**2 +
                SINH_TAYLOR_COEFFS[5] * norm_tangent_vec**4 +
                SINH_TAYLOR_COEFFS[7] * norm_tangent_vec**6 +
                SINH_TAYLOR_COEFFS[9] * norm_tangent_vec**8)
            # This avoids dividing by 0.
            norm_tangent_vec += mask_0_float * 1.0
            coef_1 += mask_else_float * (gs.cosh(norm_tangent_vec))
            coef_2 += mask_else_float * ((gs.sinh(norm_tangent_vec) /
                                          (norm_tangent_vec)))

            exp = (gs.einsum('ni,nj->nj', coef_1, base_point) +
                   gs.einsum('ni,nj->nj', coef_2, tangent_vec))

            hyperbolic_space = Hyperbolic(dimension=self.dimension)
            exp = hyperbolic_space.regularize(exp)
            return exp

        elif self.point_type == 'ball':
            norm_base_point = gs.to_ndarray(gs.linalg.norm(base_point, -1), 2,
                                            -1)
            norm_base_point = gs.repeat(norm_base_point, base_point.shape[-1],
                                        -1)
            den = 1 - norm_base_point**2

            norm_tan = gs.to_ndarray(gs.linalg.norm(tangent_vec, axis=-1), 2,
                                     -1)
            norm_tan = gs.repeat(norm_tan, base_point.shape[-1], -1)

            lambda_base_point = 1 / den

            direction = tangent_vec / norm_tan

            factor = gs.tanh(lambda_base_point * norm_tan)

            exp = self.mobius_add(base_point, direction * factor)

            return exp
        else:
            raise NotImplementedError(
                'exp is only implemented for ball and extrinsic')
예제 #25
0
    def mean(self,
             points,
             weights=None,
             n_max_iterations=32,
             epsilon=EPSILON,
             point_type='vector',
             mean_method='default',
             verbose=False):
        """Frechet mean of (weighted) points.

        Parameters
        ----------
        points : array-like, shape=[n_samples, dimension]
        weights : array-like, shape=[n_samples, 1], optional
        verbose : bool, optional

        Returns
        -------
        mean : array-like
            the Frechet mean of points, a point on the manifold
        """
        if mean_method == 'default':

            # TODO(nina): Profile this code to study performance,
            # i.e. what to do with sq_dists_between_iterates.
            def while_loop_cond(iteration, mean, variance, sq_dist):
                result = ~gs.logical_or(
                    gs.isclose(variance, 0.),
                    gs.less_equal(sq_dist, epsilon * variance))
                return result[0, 0] or iteration == 0

            def while_loop_body(iteration, mean, variance, sq_dist):

                logs = self.log(point=points, base_point=mean)

                tangent_mean = gs.einsum('nk,nj->j', weights, logs)

                tangent_mean /= sum_weights

                mean_next = self.exp(tangent_vec=tangent_mean, base_point=mean)

                sq_dist = self.squared_dist(mean_next, mean)
                sq_dists_between_iterates.append(sq_dist)

                variance = self.variance(points=points,
                                         weights=weights,
                                         base_point=mean_next)

                mean = mean_next
                iteration += 1
                return [iteration, mean, variance, sq_dist]

            if point_type == 'vector':
                points = gs.to_ndarray(points, to_ndim=2)
            if point_type == 'matrix':
                points = gs.to_ndarray(points, to_ndim=3)
            n_points = gs.shape(points)[0]

            if weights is None:
                weights = gs.ones((n_points, 1))

            weights = gs.array(weights)
            weights = gs.to_ndarray(weights, to_ndim=2, axis=1)

            sum_weights = gs.sum(weights)

            mean = points[0]
            if point_type == 'vector':
                mean = gs.to_ndarray(mean, to_ndim=2)
            if point_type == 'matrix':
                mean = gs.to_ndarray(mean, to_ndim=3)

            if n_points == 1:
                return mean

            sq_dists_between_iterates = []
            iteration = 0
            sq_dist = gs.array([[0.]])
            variance = gs.array([[0.]])

            last_iteration, mean, variance, sq_dist = gs.while_loop(
                lambda i, m, v, sq: while_loop_cond(i, m, v, sq),
                lambda i, m, v, sq: while_loop_body(i, m, v, sq),
                loop_vars=[iteration, mean, variance, sq_dist],
                maximum_iterations=n_max_iterations)

            if last_iteration == n_max_iterations:
                print('Maximum number of iterations {} reached.'
                      'The mean may be inaccurate'.format(n_max_iterations))

            if verbose:
                print('n_iter: {}, final variance: {}, final dist: {}'.format(
                    last_iteration, variance, sq_dist))

            mean = gs.to_ndarray(mean, to_ndim=2)
            return mean

        if mean_method == 'frechet-poincare-ball':

            lr = 1e-3
            tau = 5e-3

            if len(points) == 1:
                return points

            iteration = 0
            convergence = math.inf
            barycenter = points.mean(0, keepdims=True) * 0

            while convergence > tau and n_max_iterations > iteration:

                iteration += 1

                expand_barycenter = gs.repeat(barycenter, points.shape[0], 0)

                grad_tangent = 2 * self.log(points, expand_barycenter)

                cc_barycenter = self.exp(
                    lr * grad_tangent.sum(0, keepdims=True), barycenter)

                convergence = self.dist(cc_barycenter, barycenter).max().item()

                barycenter = cc_barycenter

            if iteration == n_max_iterations:
                warnings.warn(
                    'Maximum number of iterations {} reached. The '
                    'mean may be inaccurate'.format(n_max_iterations))

            return barycenter
예제 #26
0
def _ball_gradient_descent(points,
                           metric,
                           weights=None,
                           max_iter=32,
                           lr=1e-3,
                           tau=5e-3):
    """Perform ball gradient descent."""
    points = gs.to_ndarray(points, to_ndim=2)
    if len(points) == 1:
        return points[0]
    if weights is None:

        iteration = 0
        convergence = math.inf
        barycenter = gs.mean(points, axis=0, keepdims=True)

        while convergence > tau and max_iter > iteration:

            iteration += 1
            grad_tangent = 2 * metric.log(points, barycenter)
            cc_barycenter = metric.exp(lr * grad_tangent.sum(0, keepdims=True),
                                       barycenter)

            convergence = metric.dist(cc_barycenter, barycenter).max().item()

            barycenter = cc_barycenter
    else:

        weights = gs.expand_dims(weights, -1)
        weights = gs.repeat(weights, points.shape[-1], axis=2)

        barycenter = (points * weights).sum(0, keepdims=True) / weights.sum(0)
        barycenter_gs = gs.squeeze(barycenter)

        points_gs = gs.squeeze(points)
        points_flattened = gs.reshape(points_gs, (-1, points_gs.shape[-1]))

        convergence = math.inf
        iteration = 0

        while convergence > tau and max_iter > iteration:

            iteration += 1
            barycenter_flattened = gs.repeat(barycenter,
                                             len(points_gs),
                                             axis=0)
            barycenter_flattened = gs.reshape(
                barycenter_flattened, (-1, barycenter_flattened.shape[-1]))

            grad_tangent = 2 * metric.log(points_flattened,
                                          barycenter_flattened)
            grad_tangent = gs.reshape(grad_tangent, points.shape)
            grad_tangent = grad_tangent * weights

            lr_grad_tangent = lr * grad_tangent.sum(0, keepdims=True)
            lr_grad_tangent_s = lr_grad_tangent.squeeze()

            cc_barycenter = metric.exp(barycenter_gs, lr_grad_tangent_s)
            convergence = metric.dist(cc_barycenter,
                                      barycenter_gs).max().item()

            barycenter_gs = cc_barycenter
            barycenter = gs.expand_dims(cc_barycenter, 0)

        barycenter = gs.squeeze(barycenter)

    if iteration == max_iter:
        logging.warning('Maximum number of iterations {} reached. The '
                        'mean may be inaccurate'.format(max_iter))

    return barycenter
예제 #27
0
    def loss(self, example_embedding, context_embedding, negative_embedding):
        """Compute loss and grad.

        Compute loss and grad given embedding of the current example,
        embedding of the context and negative sampling embedding.

        Parameters
        ----------
        example_embedding : array-like, shape=[dim]
            Current data sample embedding.
        context_embedding : array-like, shape=[dim]
            Current context embedding.
        negative_embedding: array-like, shape=[dim]
            Current negative sample embedding.

        Returns
        -------
        total_loss : int
            The current value of the loss function.
        example_grad : array-like, shape=[dim]
            The gradient of the loss function at the embedding
            of the current data sample.
        """
        n_edges, dim = negative_embedding.shape[0], example_embedding.shape[-1]
        example_embedding = gs.expand_dims(example_embedding, 0)
        context_embedding = gs.expand_dims(context_embedding, 0)

        positive_distance = self.manifold.metric.squared_dist(
            example_embedding, context_embedding)
        positive_loss = self.log_sigmoid(-positive_distance)

        reshaped_example_embedding = gs.repeat(example_embedding,
                                               n_edges,
                                               axis=0)

        negative_distance = self.manifold.metric.squared_dist(
            reshaped_example_embedding, negative_embedding)
        negative_loss = self.log_sigmoid(negative_distance)

        total_loss = -(positive_loss + gs.sum(negative_loss))

        positive_log_sigmoid_grad = -self.grad_log_sigmoid(-positive_distance)

        positive_distance_grad = self.grad_squared_distance(
            example_embedding, context_embedding)

        positive_grad = (gs.repeat(positive_log_sigmoid_grad, dim, axis=-1) *
                         positive_distance_grad)

        negative_distance_grad = self.grad_squared_distance(
            reshaped_example_embedding, negative_embedding)

        negative_distance = gs.to_ndarray(negative_distance,
                                          to_ndim=2,
                                          axis=-1)
        negative_log_sigmoid_grad = self.grad_log_sigmoid(negative_distance)

        negative_grad = negative_log_sigmoid_grad * negative_distance_grad

        example_grad = -(positive_grad + gs.sum(negative_grad, axis=0))

        return total_loss, example_grad
예제 #28
0
 def origin(self):
     return gs.diag(gs.repeat([1, 0], [self.k, self.n - self.k]))[0]