Ejemplo n.º 1
0
    def test_sample_von_mises_fisher_arbitrary_mean(self):
        """
        Check that the maximum likelihood estimates of the mean and
        concentration parameter are close to the real values. A first
        estimation of the concentration parameter is obtained by a
        closed-form expression and improved through the Newton method.
        """
        for dim in [2, 9]:
            n_points = 10000
            sphere = Hypersphere(dim)

            # check mean value for concentrated distribution for different mean
            kappa = 1000.
            mean = sphere.random_uniform()
            points = sphere.random_von_mises_fisher(mu=mean,
                                                    kappa=kappa,
                                                    n_samples=n_points)
            sum_points = gs.sum(points, axis=0)
            result = sum_points / gs.linalg.norm(sum_points)
            expected = mean
            self.assertAllClose(result, expected, atol=MEAN_ESTIMATION_TOL)
Ejemplo n.º 2
0
 def test_optimal_quantization(self):
         """
         Check that optimal quantization yields the same result as
         the karcher flow algorithm when we look for one center.
         """
         dim = 2
         n_points = 1000
         n_centers = 1
         sphere = Hypersphere(dim)
         points = sphere.random_von_mises_fisher(
                 kappa=10, n_samples=n_points
                 )
         mean = sphere.metric.mean(points)
         centers, weights, clusters, n_iterations = sphere.metric.\
             optimal_quantization(points=points, n_centers=n_centers)
         error = sphere.metric.dist(mean, centers)
         diameter = sphere.metric.diameter(points)
         result = error / diameter
         expected = 0.0
         self.assertAllClose(
             result, expected, atol=OPTIMAL_QUANTIZATION_TOL)
Ejemplo n.º 3
0
 def test_sample_random_von_mises_fisher_kappa(self, dim, kappa, n_points):
     # check concentration parameter for dispersed distribution
     sphere = Hypersphere(dim)
     points = sphere.random_von_mises_fisher(kappa=kappa, n_samples=n_points)
     sum_points = gs.sum(points, axis=0)
     mean_norm = gs.linalg.norm(sum_points) / n_points
     kappa_estimate = (
         mean_norm * (dim + 1.0 - mean_norm**2) / (1.0 - mean_norm**2)
     )
     kappa_estimate = gs.cast(kappa_estimate, gs.float64)
     p = dim + 1
     n_steps = 100
     for _ in range(n_steps):
         bessel_func_1 = scipy.special.iv(p / 2.0, kappa_estimate)
         bessel_func_2 = scipy.special.iv(p / 2.0 - 1.0, kappa_estimate)
         ratio = bessel_func_1 / bessel_func_2
         denominator = 1.0 - ratio**2 - (p - 1.0) * ratio / kappa_estimate
         mean_norm = gs.cast(mean_norm, gs.float64)
         kappa_estimate = kappa_estimate - (ratio - mean_norm) / denominator
     result = kappa_estimate
     expected = kappa
     self.assertAllClose(result, expected, atol=KAPPA_ESTIMATION_TOL)
Ejemplo n.º 4
0
class TestOnlineKmeans(geomstats.tests.TestCase):
    def setUp(self):
        gs.random.seed(1234)

        self.dimension = 2
        self.space = Hypersphere(dim=self.dimension)
        self.metric = self.space.metric
        self.data = self.space.random_von_mises_fisher(kappa=100, n_samples=50)

    @geomstats.tests.np_only
    def test_fit(self):
        X = self.data
        clustering = OnlineKMeans(metric=self.metric,
                                  n_clusters=1,
                                  n_repetitions=10)
        clustering.fit(X)

        center = clustering.cluster_centers_
        mean = FrechetMean(metric=self.metric, lr=1.)
        mean.fit(X)

        result = self.metric.dist(center, mean.estimate_)
        expected = 0.
        self.assertAllClose(expected, result, atol=1e-3)

    @geomstats.tests.np_only
    def test_predict(self):
        X = self.data
        clustering = OnlineKMeans(metric=self.metric,
                                  n_clusters=3,
                                  n_repetitions=1)
        clustering.fit(X)

        point = self.data[0, :]
        prediction = clustering.predict(point)

        result = prediction
        expected = clustering.labels_[0]
        self.assertAllClose(expected, result)
Ejemplo n.º 5
0
    def _init_double_cluster(
        seed=10,
        num_of_samples=20,
        size_of_dim=2,
        kappa_value=20,
        orthogonality_of_sphere=3,
        bandwidth=0.3,
        tol=1e-4,
        num_of_centers=2,
    ):
        gs.random.seed(seed)
        number_of_samples = num_of_samples
        sphere = Hypersphere(size_of_dim)
        metric = sphere.metric

        cluster = sphere.random_von_mises_fisher(
            kappa=kappa_value, n_samples=number_of_samples
        )

        special_orthogonal = SpecialOrthogonal(orthogonality_of_sphere)
        rotation1 = special_orthogonal.random_uniform()
        rotation2 = special_orthogonal.random_uniform()

        cluster_1 = cluster @ rotation1
        cluster_2 = cluster @ rotation2

        combined_cluster = gs.concatenate((cluster_1, cluster_2))
        rms = riemannian_mean_shift(
            manifold=sphere,
            metric=metric,
            bandwidth=bandwidth,
            tol=tol,
            n_centers=num_of_centers,
        )

        rms.fit(combined_cluster)

        return combined_cluster, rms
Ejemplo n.º 6
0
    def test_double_cluster_riemannian_mean_shift(self):
        gs.random.seed(10)
        number_of_samples = 20
        sphere = Hypersphere(dim=2)
        metric = HypersphereMetric(2)

        cluster = sphere.random_von_mises_fisher(kappa=20,
                                                 n_samples=number_of_samples)

        special_orthogonal = SpecialOrthogonal(3)
        rotation1 = special_orthogonal.random_uniform()
        rotation2 = special_orthogonal.random_uniform()

        cluster_1 = cluster @ rotation1
        cluster_2 = cluster @ rotation2

        combined_cluster = gs.concatenate((cluster_1, cluster_2))
        rms = riemannian_mean_shift(manifold=sphere,
                                    metric=metric,
                                    bandwidth=0.3,
                                    tol=1e-4,
                                    n_centers=2)

        rms.fit(combined_cluster)
        closest_centers = rms.predict(combined_cluster)

        count_in_first_cluster = 0
        for point in closest_centers:
            if gs.allclose(point, rms.centers[0]):
                count_in_first_cluster += 1

        count_in_second_cluster = 0
        for point in closest_centers:
            if gs.allclose(point, rms.centers[1]):
                count_in_second_cluster += 1

        self.assertEqual(combined_cluster.shape[0],
                         count_in_first_cluster + count_in_second_cluster)
Ejemplo n.º 7
0
def main():
    sphere = Hypersphere(dimension=2)

    data = sphere.random_von_mises_fisher(kappa=10, n_samples=1000)

    n_clusters = 4
    clustering = OnlineKMeans(metric=sphere.metric, n_clusters=n_clusters)
    clustering = clustering.fit(data)

    plt.figure(0)
    ax = plt.subplot(111, projection="3d")
    visualization.plot(points=clustering.cluster_centers_, ax=ax,
                       space='S2', c='r')
    plt.show()

    plt.figure(1)
    ax = plt.subplot(111, projection="3d")
    sphere_plot = visualization.Sphere()
    sphere_plot.draw(ax=ax)
    for i in range(n_clusters):
        cluster = data[clustering.labels_ == i, :]
        sphere_plot.draw_points(ax=ax, points=cluster)
    plt.show()
Ejemplo n.º 8
0
def main():
    """Plot a Kernel Density Estimation Classification on the sphere."""
    sphere = Hypersphere(dim=2)
    sphere_distance = sphere.metric.dist

    n_labels = 2
    n_samples_per_dataset = 10
    n_targets = 200
    radius = np.inf

    kernel = triangular_radial_kernel
    bandwidth = 3

    n_training_samples = n_labels * n_samples_per_dataset
    dataset_1 = sphere.random_von_mises_fisher(
        kappa=10,
        n_samples=n_samples_per_dataset)
    dataset_2 = - sphere.random_von_mises_fisher(
        kappa=10,
        n_samples=n_samples_per_dataset)
    training_dataset = gs.concatenate((dataset_1, dataset_2), axis=0)
    labels_dataset_1 = gs.zeros([n_samples_per_dataset], dtype=gs.int64)
    labels_dataset_2 = gs.ones([n_samples_per_dataset], dtype=gs.int64)
    labels = gs.concatenate((labels_dataset_1, labels_dataset_2))
    target = sphere.random_uniform(n_samples=n_targets)

    labels_colors = gs.zeros([n_labels, 3])
    labels_colors[0, :] = gs.array([0, 0, 1])
    labels_colors[1, :] = gs.array([1, 0, 0])

    kde = KernelDensityEstimationClassifier(
        radius=radius,
        distance=sphere_distance,
        kernel=kernel,
        bandwidth=bandwidth,
        outlier_label='most_frequent')
    kde.fit(training_dataset, labels)
    target_labels = kde.predict(target)
    target_labels_proba = kde.predict_proba(target)

    plt.figure(0)
    ax = plt.subplot(111, projection='3d')
    plt.title('Training set')
    sphere_plot = visualization.Sphere()
    sphere_plot.draw(ax=ax)
    colors = gs.zeros([n_training_samples, 3])
    for i_sample in range(n_training_samples):
        colors[i_sample, :] = labels_colors[labels[i_sample], :]
    sphere_plot.draw_points(ax=ax, points=training_dataset, c=colors)

    plt.figure(1)
    ax = plt.subplot(111, projection='3d')
    plt.title('Classification')
    sphere_plot = visualization.Sphere()
    sphere_plot.draw(ax=ax)
    colors = gs.zeros([n_targets, 3])
    for i_target in range(n_targets):
        colors[i_target, :] = labels_colors[target_labels[i_target], :]
    sphere_plot.draw_points(ax=ax, points=target, c=colors)

    plt.figure(2)
    ax = plt.subplot(111, projection='3d')
    plt.title('Probabilistic classification')
    sphere_plot = visualization.Sphere()
    sphere_plot.draw(ax=ax)
    colors = target_labels_proba @ labels_colors
    sphere_plot.draw_points(ax=ax, points=target, c=colors)

    plt.show()