def do_test_kmeans_results(self, representation, algo, dtype):
        # cheks that kmeans works as intended
        array_constr = {
            'dense': np.array,
            'sparse': sp.csr_matrix
        }[representation]
        X = array_constr([[0, 0], [0.5, 0], [0.5, 1], [1, 1]], dtype=dtype)
        # will be rescaled to [1.5, 0.5, 0.5, 1.5]
        sample_weight = [3, 1, 1, 3]
        init_centers = np.array([[0, 0], [1, 1]], dtype=dtype)

        expected_labels = [0, 0, 1, 1]
        expected_inertia = 0.1875
        expected_centers = np.array([[0.125, 0], [0.875, 1]], dtype=dtype)
        expected_n_iter = 2

        kmeans = KMeansL1L2(n_clusters=2,
                            n_init=1,
                            init=init_centers,
                            algorithm=algo)
        kmeans.fit(X, sample_weight=sample_weight)

        assert_array_equal(kmeans.labels_, expected_labels)
        assert_almost_equal(kmeans.inertia_, expected_inertia)
        assert_array_almost_equal(kmeans.cluster_centers_, expected_centers)
        self.assertEqualArray(kmeans.n_iter_, expected_n_iter)
Ejemplo n.º 2
0
 def test_kmeans_l2_iris(self):
     iris = datasets.load_iris()
     X = iris.data
     clr = KMeansL1L2(4, norm='L2')
     clr.fit(X)
     cls = set(clr.predict(X))
     self.assertEqual({0, 1, 2, 3}, cls)
 def test_k_means_random_init_not_precomputed(self):
     km = KMeansL1L2(init="random",
                     n_clusters=TestKMeansL1L2Sklearn.n_clusters,
                     random_state=42,
                     precompute_distances=False).fit(
                         TestKMeansL1L2Sklearn.X)
     self._check_fitted_model(km)
Ejemplo n.º 4
0
 def test_kmeans_l2_random(self):
     iris = datasets.load_iris()
     X = iris.data
     clr = KMeansL1L2(4, init="random")
     clr.fit(X)
     cls = set(clr.predict(X))
     self.assertEqual({0, 1, 2, 3}, cls)
Ejemplo n.º 5
0
 def test_kmeans_l1_small(self):
     iris = datasets.load_iris()
     X = iris.data
     X = X[:6]
     clr = KMeansL1L2(4, norm='L1', n_jobs=1)
     clr.fit(X)
     cls = set(clr.predict(X))
     self.assertEqual({0, 1, 2, 3}, cls)
Ejemplo n.º 6
0
    def do_test_kmeans_results(self, representation, algo, dtype, norm, sw):
        # cheks that kmeans works as intended
        array_constr = {
            'dense': np.array,
            'sparse': sp.csr_matrix
        }[representation]
        X = array_constr([[0, 0], [0.5, 0], [0.5, 1], [1, 1]], dtype=dtype)
        init_centers = np.array([[0, 0], [1, 1]], dtype=dtype)
        # will be rescaled to [1.5, 0.5, 0.5, 1.5]
        if sw:
            sample_weight = [3, 1, 1, 3]
            if sklearn_023:
                expected_inertia = 0.375
            else:
                expected_inertia = 0.1875
            expected_centers = np.array([[0.125, 0], [0.875, 1]], dtype=dtype)
            expected_n_iter = 2
        else:
            sample_weight = None
            if norm == 'L2':
                expected_inertia = 0.25
                expected_centers = np.array([[0.25, 0], [0.75, 1]],
                                            dtype=dtype)
                expected_n_iter = 2
            else:
                expected_inertia = 1.
                expected_centers = np.array([[0.25, 0], [0.75, 1]],
                                            dtype=dtype)
                expected_n_iter = 1

        expected_labels = [0, 0, 1, 1]

        try:
            kmeans = KMeansL1L2(n_clusters=2,
                                n_init=1,
                                init=init_centers,
                                algorithm=algo,
                                norm=norm)
        except NotImplementedError as e:
            if ("Only algorithm 'full' is implemented" in str(e)
                    and norm == 'L1'):
                return
            raise e

        try:
            kmeans.fit(X, sample_weight=sample_weight)
        except NotImplementedError as e:
            if ("Non uniform weights are not implemented yet" in str(e)
                    and norm == 'L1'):
                return
            if ("Sparse matrix is not implemented" in str(e) and norm == 'L1'):
                return
            raise e

        assert_array_equal(kmeans.labels_, expected_labels)
        assert_almost_equal(kmeans.inertia_, expected_inertia)
        assert_array_almost_equal(kmeans.cluster_centers_, expected_centers)
        self.assertEqualArray(kmeans.n_iter_, expected_n_iter)
Ejemplo n.º 7
0
 def test_kmeans_l1_check(self):
     X = numpy.array([[-10, 1, 2, 3, 4, 10], [-10, 1, 2, 3, 4, 10]]).T
     clr = KMeansL1L2(2, norm='L1')
     clr.fit(X)
     cls = set(clr.predict(X))
     self.assertEqual({0, 1}, cls)
     self.assertEqual(clr.cluster_centers_.shape, (2, 2))
     self.assertEqualArray(clr.cluster_centers_.max(), [3, 3])
     tr = clr.transform(X)
     self.assertEqual(tr.shape, (X.shape[0], 2))
     tr = clr.transform([[3, 3]])
     self.assertEqualArray(tr.min(), [0])
    def test_k_means_new_centers(self):
        # Explore the part of the code where a new center is reassigned
        X = np.array([[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0],
                      [0, 0, 0, 0], [0, 1, 0, 0]])
        labels = [0, 1, 2, 1, 1, 2]
        bad_centers = np.array([[+0, 1, 0, 0], [.2, 0, .2, .2], [+0, 0, 0, 0]])

        km = KMeansL1L2(n_clusters=3,
                        init=bad_centers,
                        n_init=1,
                        max_iter=10,
                        random_state=1)
        for this_X in (X, sp.coo_matrix(X)):
            km.fit(this_X)
            this_labels = km.labels_
            # Reorder the labels so that the first instance is in cluster 0,
            # the second in cluster 1, ...
            this_labels = np.unique(this_labels,
                                    return_index=True)[1][this_labels]
            np.testing.assert_array_equal(this_labels, labels)
Ejemplo n.º 9
0
 def test_k_means_plus_plus_init_not_precomputed(self):
     km = KMeansL1L2(init="k-means++",
                     n_clusters=TestKMeansL1L2Sklearn.n_clusters,
                     random_state=42).fit(TestKMeansL1L2Sklearn.X)
     self._check_fitted_model(km)