def test_locate_elbow(self):
        """
        Test the addition of locate_elbow to an image
        """
        X, y = make_blobs(n_samples=1000,
                          n_features=5,
                          centers=3,
                          shuffle=True,
                          random_state=42)

        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=6,
            metric="calinski_harabasz",
            timings=False,
            locate_elbow=True,
        )
        visualizer.fit(X)
        assert len(visualizer.k_scores_) == 5
        assert visualizer.elbow_value_ == 3
        expected = np.array(
            [4286.479848, 12463.383743, 8766.999551, 6950.08391, 5865.79722])

        visualizer.finalize()
        self.assert_images_similar(visualizer, windows_tol=2.2)
        assert_array_almost_equal(visualizer.k_scores_, expected)
    def test_timings(self):
        """
        Test the twinx double axes with k-elbow timings
        """
        visualizer = KElbowVisualizer(KMeans(random_state=0),
                                      k=5,
                                      timings=True,
                                      locate_elbow=False)
        visualizer.fit(self.clusters.X)

        # Check that we kept track of time
        assert len(visualizer.k_timers_) == 4
        assert all([t > 0 for t in visualizer.k_timers_])

        # Check that we plotted time on a twinx
        assert hasattr(visualizer, "axes")
        assert len(visualizer.axes) == 2

        # delete the timings axes and
        # overwrite k_timers_, k_values_ for image similarity Tests
        visualizer.axes[1].remove()
        visualizer.k_timers_ = [
            0.01084589958190918,
            0.011144161224365234,
            0.017028093338012695,
            0.010634183883666992,
        ]
        visualizer.k_values_ = [2, 3, 4, 5]

        # call draw again which is normally called in fit
        visualizer.draw()
        visualizer.finalize()

        self.assert_images_similar(visualizer)
    def test_integrated_mini_batch_kmeans_elbow(self):
        """
        Test no exceptions for mini-batch kmeans k-elbow visualizer
        """
        # NOTE #182: cannot use occupancy dataset because of memory usage

        # Generate a blobs data set
        X, y = make_blobs(n_samples=1000,
                          n_features=12,
                          centers=6,
                          shuffle=True,
                          random_state=42)

        try:
            _, ax = plt.subplots()

            visualizer = KElbowVisualizer(MiniBatchKMeans(random_state=42),
                                          k=4,
                                          ax=ax)
            visualizer.fit(X)
            visualizer.finalize()

            self.assert_images_similar(visualizer)
        except Exception as e:
            pytest.fail("error during k-elbow: {}".format(e))
Exemple #4
0
    def test_calinski_harabasz_metric(self):
        """
        Test the calinski-harabasz metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="calinski_harabasz",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)
        assert len(visualizer.k_scores_) == 4
        assert visualizer.elbow_value_ is None

        expected = np.array([
            81.66272625603568,
            50.992378259195554,
            39.573201061900455,
            37.06865804955547,
        ])

        visualizer.finalize()
        self.assert_images_similar(visualizer)
        assert_array_almost_equal(visualizer.k_scores_, expected)
    def test_topic_modeling_k_means(self):
        """
        Test topic modeling k-means on the hobbies corpus
        """
        corpus = load_hobbies()

        tfidf = TfidfVectorizer()
        docs = tfidf.fit_transform(corpus.data)
        visualizer = KElbowVisualizer(KMeans(), k=(4, 8))

        visualizer.fit(docs)
        visualizer.finalize()

        self.assert_images_similar(visualizer)
    def test_silhouette_metric(self):
        """
        Test the silhouette metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="silhouette",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)

        expected = np.array([0.691636, 0.456646, 0.255174, 0.239842])
        assert len(visualizer.k_scores_) == 4

        visualizer.finalize()
        self.assert_images_similar(visualizer)
        assert_array_almost_equal(visualizer.k_scores_, expected)
    def test_distortion_metric(self):
        """
        Test the distortion metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="distortion",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)

        expected = np.array([69.100065, 54.081571, 43.146921, 34.978487])
        assert len(visualizer.k_scores_) == 4

        visualizer.finalize()
        self.assert_images_similar(visualizer, tol=0.03)
        assert_array_almost_equal(visualizer.k_scores_, expected)
Exemple #8
0
    def test_calinski_harabasz_metric(self):
        """
        Test the calinski-harabasz metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="calinski_harabasz",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)
        assert len(visualizer.k_scores_) == 4
        assert visualizer.elbow_value_ is None

        expected = np.array([81.662726, 50.992378, 40.952179, 35.939494])

        visualizer.finalize()
        self.assert_images_similar(visualizer)
        assert_array_almost_equal(visualizer.k_scores_, expected)
Exemple #9
0
    def test_silhouette_metric(self):
        """
        Test the silhouette metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="silhouette",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)

        expected = np.array([
            0.6916363804000003,
            0.456645663683503,
            0.26918583373704463,
            0.25523298106687914,
        ])
        assert len(visualizer.k_scores_) == 4

        visualizer.finalize()
        self.assert_images_similar(visualizer)
        assert_array_almost_equal(visualizer.k_scores_, expected)
Exemple #10
0
    def test_distortion_metric(self):
        """
        Test the distortion metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="distortion",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)

        expected = np.array([
            69.10006514142941,
            54.081571290449936,
            44.491830981793605,
            33.99887993254433,
        ])
        assert len(visualizer.k_scores_) == 4

        visualizer.finalize()
        self.assert_images_similar(visualizer, tol=0.03)
        assert_array_almost_equal(visualizer.k_scores_, expected)
Exemple #11
0
    def test_set_colors_manually(self):
        """
        Test the silhouette metric of the k-elbow visualizer
        """
        oz = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
        )

        oz.metric_color = "r"
        oz.timing_color = "y"
        oz.vline_color = "c"

        # Create artificial "fit" data for testing purposes
        oz.k_values_ = [1, 2, 3, 4, 5, 6, 7, 8]
        oz.k_timers_ = [6.2, 8.3, 10.1, 15.8, 21.2, 27.9, 38.2, 44.9]
        oz.k_scores_ = [.8, .7, .55, .48, .40, .38, .35, .30]
        oz.elbow_value_ = 5
        oz.elbow_score_ = 0.40

        # Execute drawing
        oz.draw()
        oz.finalize()
        self.assert_images_similar(oz, tol=3.2)