def test_locate_elbow(self): """ Test the addition of locate_elbow to an image """ X, y = make_blobs(n_samples=1000, n_features=5, centers=3, shuffle=True, random_state=42) visualizer = KElbowVisualizer( KMeans(random_state=0), k=6, metric="calinski_harabasz", timings=False, locate_elbow=True, ) visualizer.fit(X) assert len(visualizer.k_scores_) == 5 assert visualizer.elbow_value_ == 3 expected = np.array( [4286.479848, 12463.383743, 8766.999551, 6950.08391, 5865.79722]) visualizer.finalize() self.assert_images_similar(visualizer, windows_tol=2.2) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_timings(self): """ Test the twinx double axes with k-elbow timings """ visualizer = KElbowVisualizer(KMeans(random_state=0), k=5, timings=True, locate_elbow=False) visualizer.fit(self.clusters.X) # Check that we kept track of time assert len(visualizer.k_timers_) == 4 assert all([t > 0 for t in visualizer.k_timers_]) # Check that we plotted time on a twinx assert hasattr(visualizer, "axes") assert len(visualizer.axes) == 2 # delete the timings axes and # overwrite k_timers_, k_values_ for image similarity Tests visualizer.axes[1].remove() visualizer.k_timers_ = [ 0.01084589958190918, 0.011144161224365234, 0.017028093338012695, 0.010634183883666992, ] visualizer.k_values_ = [2, 3, 4, 5] # call draw again which is normally called in fit visualizer.draw() visualizer.finalize() self.assert_images_similar(visualizer)
def test_integrated_mini_batch_kmeans_elbow(self): """ Test no exceptions for mini-batch kmeans k-elbow visualizer """ # NOTE #182: cannot use occupancy dataset because of memory usage # Generate a blobs data set X, y = make_blobs(n_samples=1000, n_features=12, centers=6, shuffle=True, random_state=42) try: _, ax = plt.subplots() visualizer = KElbowVisualizer(MiniBatchKMeans(random_state=42), k=4, ax=ax) visualizer.fit(X) visualizer.finalize() self.assert_images_similar(visualizer) except Exception as e: pytest.fail("error during k-elbow: {}".format(e))
def test_calinski_harabasz_metric(self): """ Test the calinski-harabasz metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="calinski_harabasz", timings=False, locate_elbow=False, ) visualizer.fit(self.clusters.X) assert len(visualizer.k_scores_) == 4 assert visualizer.elbow_value_ is None expected = np.array([ 81.66272625603568, 50.992378259195554, 39.573201061900455, 37.06865804955547, ]) visualizer.finalize() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_topic_modeling_k_means(self): """ Test topic modeling k-means on the hobbies corpus """ corpus = load_hobbies() tfidf = TfidfVectorizer() docs = tfidf.fit_transform(corpus.data) visualizer = KElbowVisualizer(KMeans(), k=(4, 8)) visualizer.fit(docs) visualizer.finalize() self.assert_images_similar(visualizer)
def test_silhouette_metric(self): """ Test the silhouette metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="silhouette", timings=False, locate_elbow=False, ) visualizer.fit(self.clusters.X) expected = np.array([0.691636, 0.456646, 0.255174, 0.239842]) assert len(visualizer.k_scores_) == 4 visualizer.finalize() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_distortion_metric(self): """ Test the distortion metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="distortion", timings=False, locate_elbow=False, ) visualizer.fit(self.clusters.X) expected = np.array([69.100065, 54.081571, 43.146921, 34.978487]) assert len(visualizer.k_scores_) == 4 visualizer.finalize() self.assert_images_similar(visualizer, tol=0.03) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_calinski_harabasz_metric(self): """ Test the calinski-harabasz metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="calinski_harabasz", timings=False, locate_elbow=False, ) visualizer.fit(self.clusters.X) assert len(visualizer.k_scores_) == 4 assert visualizer.elbow_value_ is None expected = np.array([81.662726, 50.992378, 40.952179, 35.939494]) visualizer.finalize() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_silhouette_metric(self): """ Test the silhouette metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="silhouette", timings=False, locate_elbow=False, ) visualizer.fit(self.clusters.X) expected = np.array([ 0.6916363804000003, 0.456645663683503, 0.26918583373704463, 0.25523298106687914, ]) assert len(visualizer.k_scores_) == 4 visualizer.finalize() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_distortion_metric(self): """ Test the distortion metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="distortion", timings=False, locate_elbow=False, ) visualizer.fit(self.clusters.X) expected = np.array([ 69.10006514142941, 54.081571290449936, 44.491830981793605, 33.99887993254433, ]) assert len(visualizer.k_scores_) == 4 visualizer.finalize() self.assert_images_similar(visualizer, tol=0.03) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_set_colors_manually(self): """ Test the silhouette metric of the k-elbow visualizer """ oz = KElbowVisualizer( KMeans(random_state=0), k=5, ) oz.metric_color = "r" oz.timing_color = "y" oz.vline_color = "c" # Create artificial "fit" data for testing purposes oz.k_values_ = [1, 2, 3, 4, 5, 6, 7, 8] oz.k_timers_ = [6.2, 8.3, 10.1, 15.8, 21.2, 27.9, 38.2, 44.9] oz.k_scores_ = [.8, .7, .55, .48, .40, .38, .35, .30] oz.elbow_value_ = 5 oz.elbow_score_ = 0.40 # Execute drawing oz.draw() oz.finalize() self.assert_images_similar(oz, tol=3.2)