Beispiel #1
0
def test_kcenters_5():
    # test custom metric. this is a euclidean metric vs. a squared euclidean metric (should give)
    # the same assignments
    model1 = KCenters(n_clusters=10, random_state=0, metric='euclidean')
    model2 = KCenters(n_clusters=10, random_state=0, metric=lambda target, ref, i: np.sum((target-ref[i])**2, axis=1))

    data = np.random.RandomState(0).randn(100, 2)
    eq(model1.fit_predict([data])[0], model2.fit_predict([data])[0])
Beispiel #2
0
def test_kcenters_3():
    # test for predict using euclidean distance
    model = KCenters(n_clusters=10)
    data = np.random.randn(100, 2)
    labels1 = model.fit_predict([data])
    labels2 = model.predict([data])

    eq(labels1[0], labels2[0])
    all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_)
    eq(labels2[0], np.argmin(all_pairs, axis=1))
Beispiel #3
0
def test_kcenters_4():
    # test for predict() using non-euclidean distance. because of the
    # way the code is structructured, this takes a different path
    model = KCenters(n_clusters=10, metric='cityblock')
    data = np.random.randn(100, 2)
    labels1 = model.fit_predict([data])
    labels2 = model.predict([data])

    eq(labels1[0], labels2[0])
    all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_, metric='cityblock')
    eq(labels2[0], np.argmin(all_pairs, axis=1))
Beispiel #4
0
def test_kcenters_8():
    X = np.random.RandomState(1).randn(100, 2)
    for dtype in [np.float64, np.float32]:
        X = X.astype(dtype)
        m1 = KCenters(n_clusters=10, random_state=0, opt=True).fit([X])
        m2 = KCenters(n_clusters=10, random_state=0, opt=False).fit([X])

        eq(m1.cluster_centers_, m2.cluster_centers_)
        eq(m1.distances_[0], m2.distances_[0])
        eq(m1.labels_[0], m2.labels_[0])
        assert np.all(np.logical_not(np.isnan(m1.distances_[0])))
        eq(m1.predict([X])[0], m2.predict([X])[0])
        eq(m1.predict([X])[0], m1.labels_[0])
Beispiel #5
0
def test_kcenters_2():
    # some data at (0,0), some data at (1,1) and some data at (0.5, 0.5)
    data = [np.zeros((10,2)), np.ones((10,2)), 0.5*np.ones((10,2))]
    m = KCenters(n_clusters=2, random_state=0)
    m.fit(data)

    # the centers should be [0,0], [1,1] (in either order). This
    # assumes that the random state seeded the initial center at
    # either (0,0) or (1,1). A different random state could have
    # seeded the first cluster at [0.5, 0.5]
    assert np.all(m.cluster_centers_ == np.array([[0,0], [1,1]])) or \
        np.all(m.cluster_centers_ == np.array([[1,1], [0,0]]))

    # the distances should be 0 or sqrt(2)/2
    eq(np.unique(np.concatenate(m.distances_)), np.array([0, np.sqrt(2)/2]))
Beispiel #6
0
def test_kcenters_1():
    # make sure all the shapes are correct of the fit parameters

    m = KCenters(n_clusters=3)
    m.fit([np.random.randn(23,2), np.random.randn(10,2)])

    assert isinstance(m.labels_, list)
    assert isinstance(m.distances_, list)
    assert len(m.labels_) == 2
    eq(m.cluster_centers_.shape, (3,2))
    eq(m.labels_[0].shape, (23,))
    eq(m.labels_[1].shape, (10,))
    eq(m.distances_[0].shape, (23,))
    eq(m.distances_[1].shape, (10,))

    eq(m.fit_predict([np.random.randn(10, 2)])[0].shape, (10,))
Beispiel #7
0
def test_kcenters_6():
    # test with a custom metric when the input data isn't a list of numpy arrays

    x = md.Trajectory(xyz=np.random.randn(100,1,3), topology=None)
    # just get the sqeuclidean for the first atom along the first coordinate
    metric = lambda target, ref, i: (target.xyz[:, 0, 0] - ref.xyz[i, 0, 0])**2
    model1 = KCenters(n_clusters=10, metric=metric, random_state=0)
    model1.fit([x])

    model2 = KCenters(n_clusters=10, metric='sqeuclidean', random_state=0)
    model2.fit([x.xyz[:, :, 0]])
    eq(reduce(operator.add, model1.cluster_centers_).xyz[:, 0, 0],
       model2.cluster_centers_[:, 0])