Beispiel #1
0
def test_kcenters_5():
    # test custom metric. this is a euclidean metric vs. a squared euclidean metric (should give)
    # the same assignments
    model1 = KCenters(n_clusters=10, random_state=0, metric='euclidean')
    model2 = KCenters(n_clusters=10, random_state=0, metric=lambda target, ref, i: np.sum((target-ref[i])**2, axis=1))

    data = np.random.RandomState(0).randn(100, 2)
    eq(model1.fit_predict([data])[0], model2.fit_predict([data])[0])
Beispiel #2
0
def test_kcenters_3():
    # test for predict using euclidean distance
    model = KCenters(n_clusters=10)
    data = np.random.randn(100, 2)
    labels1 = model.fit_predict([data])
    labels2 = model.predict([data])

    eq(labels1[0], labels2[0])
    all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_)
    eq(labels2[0], np.argmin(all_pairs, axis=1))
Beispiel #3
0
def test_kcenters_4():
    # test for predict() using non-euclidean distance. because of the
    # way the code is structructured, this takes a different path
    model = KCenters(n_clusters=10, metric='cityblock')
    data = np.random.randn(100, 2)
    labels1 = model.fit_predict([data])
    labels2 = model.predict([data])

    eq(labels1[0], labels2[0])
    all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_, metric='cityblock')
    eq(labels2[0], np.argmin(all_pairs, axis=1))
Beispiel #4
0
def test_kcenters_1():
    # make sure all the shapes are correct of the fit parameters

    m = KCenters(n_clusters=3)
    m.fit([np.random.randn(23,2), np.random.randn(10,2)])

    assert isinstance(m.labels_, list)
    assert isinstance(m.distances_, list)
    assert len(m.labels_) == 2
    eq(m.cluster_centers_.shape, (3,2))
    eq(m.labels_[0].shape, (23,))
    eq(m.labels_[1].shape, (10,))
    eq(m.distances_[0].shape, (23,))
    eq(m.distances_[1].shape, (10,))

    eq(m.fit_predict([np.random.randn(10, 2)])[0].shape, (10,))