def testKMeansFitPredict(self): # check that fit.predict gives same result as fit_predict algos = ['full', 'elkan'] seed_max_iter_tols = [ (0, 2, 1e-7), # strict non-convergence (1, 2, 1e-1), # loose non-convergence (3, 300, 1e-7), # strict convergence (4, 300, 1e-1), # loose convergence ] for algo in algos: for seed, max_iter, tol in seed_max_iter_tols: rng = np.random.RandomState(seed) X = make_blobs(n_samples=1000, n_features=10, centers=10, random_state=rng)[0] kmeans = KMeans(algorithm=algo, n_clusters=10, random_state=seed, tol=tol, max_iter=max_iter, init='k-means++') labels_1 = kmeans.fit(X).predict(X) labels_2 = kmeans.fit_predict(X) # Due to randomness in the order in which chunks of data are processed when # using more than one thread, the absolute values of the labels can be # different between the 2 strategies but they should correspond to the same # clustering. self.assertAlmostEqual(v_measure_score(labels_1, labels_2), 1)
def test_k_means_fit_predict(setup, algo, seed, max_iter, tol): # check that fit.predict gives same result as fit_predict rng = np.random.RandomState(seed) X = make_blobs(n_samples=1000, n_features=10, centers=10, random_state=rng)[0] kmeans = KMeans(algorithm=algo, n_clusters=10, random_state=seed, tol=tol, max_iter=max_iter, init='k-means++') labels_1 = kmeans.fit(X).predict(X) labels_2 = kmeans.fit_predict(X) # Due to randomness in the order in which chunks of data are processed when # using more than one thread, the absolute values of the labels can be # different between the 2 strategies but they should correspond to the same # clustering. assert pytest.approx(v_measure_score(labels_1, labels_2)) == 1