def test_kcenters_5(): # test custom metric. this is a euclidean metric vs. a squared euclidean metric (should give) # the same assignments model1 = KCenters(n_clusters=10, random_state=0, metric='euclidean') model2 = KCenters(n_clusters=10, random_state=0, metric=lambda target, ref, i: np.sum((target-ref[i])**2, axis=1)) data = np.random.RandomState(0).randn(100, 2) eq(model1.fit_predict([data])[0], model2.fit_predict([data])[0])
def test_kcenters_3(): # test for predict using euclidean distance model = KCenters(n_clusters=10) data = np.random.randn(100, 2) labels1 = model.fit_predict([data]) labels2 = model.predict([data]) eq(labels1[0], labels2[0]) all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_) eq(labels2[0], np.argmin(all_pairs, axis=1))
def test_kcenters_4(): # test for predict() using non-euclidean distance. because of the # way the code is structructured, this takes a different path model = KCenters(n_clusters=10, metric='cityblock') data = np.random.randn(100, 2) labels1 = model.fit_predict([data]) labels2 = model.predict([data]) eq(labels1[0], labels2[0]) all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_, metric='cityblock') eq(labels2[0], np.argmin(all_pairs, axis=1))
def test_kcenters_1(): # make sure all the shapes are correct of the fit parameters m = KCenters(n_clusters=3) m.fit([np.random.randn(23,2), np.random.randn(10,2)]) assert isinstance(m.labels_, list) assert isinstance(m.distances_, list) assert len(m.labels_) == 2 eq(m.cluster_centers_.shape, (3,2)) eq(m.labels_[0].shape, (23,)) eq(m.labels_[1].shape, (10,)) eq(m.distances_[0].shape, (23,)) eq(m.distances_[1].shape, (10,)) eq(m.fit_predict([np.random.randn(10, 2)])[0].shape, (10,))