예제 #1
0
def test_3():
    # test using a callable metric. should get same results
    model1 = LandmarkAgglomerative(n_clusters=10, n_landmarks=20, metric='euclidean')
    model2 = LandmarkAgglomerative(n_clusters=10, n_landmarks=20, metric=lambda target, ref, i: np.sqrt(np.sum((target-ref[i])**2, axis=1)))

    data = np.random.RandomState(0).randn(100, 2)
    eq(model1.fit_predict([data])[0], model2.fit_predict([data])[0])
예제 #2
0
def test_callable_metric():
    def my_euc(target, ref, i):
        return np.sqrt(np.sum((target - ref[i]) ** 2, axis=1))

    model1 = LandmarkAgglomerative(n_clusters=10, n_landmarks=20,
                                   metric='euclidean')
    model2 = LandmarkAgglomerative(n_clusters=10, n_landmarks=20, metric=my_euc)

    data = np.random.RandomState(0).randn(100, 2)
    eq(model1.fit_predict([data])[0], model2.fit_predict([data])[0])
예제 #3
0
def test_2():
    # this should be a really easy clustering problem
    x = [random.randn(20,2)+10, random.randn(20,2)]

    n_clusters = 2
    model1 = LandmarkAgglomerative(n_clusters=n_clusters)
    model2 = LandmarkAgglomerative(n_clusters=n_clusters,
        landmark_strategy='random', random_state=random, n_landmarks=20)

    labels1 = model1.fit_predict(x)
    labels2 = model2.fit_predict(x)
    assert adjusted_rand_score(np.concatenate(labels1), np.concatenate(labels2)) == 1.0
예제 #4
0
def test_3():
    # test using a callable metric. should get same results
    model1 = LandmarkAgglomerative(n_clusters=10,
                                   n_landmarks=20,
                                   metric='euclidean')
    model2 = LandmarkAgglomerative(n_clusters=10,
                                   n_landmarks=20,
                                   metric=lambda target, ref, i: np.sqrt(
                                       np.sum((target - ref[i])**2, axis=1)))

    data = np.random.RandomState(0).randn(100, 2)
    eq(model1.fit_predict([data])[0], model2.fit_predict([data])[0])
예제 #5
0
def test_2():
    # this should be a really easy clustering problem
    x = [random.randn(20, 2) + 10, random.randn(20, 2)]

    n_clusters = 2
    model1 = LandmarkAgglomerative(n_clusters=n_clusters)
    model2 = LandmarkAgglomerative(n_clusters=n_clusters,
                                   landmark_strategy='random',
                                   random_state=random, n_landmarks=20)

    labels1 = model1.fit_predict(x)
    labels2 = model2.fit_predict(x)
    assert adjusted_rand_score(np.concatenate(labels1),
                               np.concatenate(labels2)) == 1.0
예제 #6
0
def test_agglom_with_metric_msm():
    my_list = [_get_random_prob_dist(4) for i in range(100)]
    my_flat = np.array([x.flatten() for x in my_list])
    model = LandmarkAgglomerative(n_clusters=2,
                                  metric=sym_kl_divergence_msm,
                                  linkage='complete')
    assert model.fit_predict([my_flat])[0].shape == (100, )
예제 #7
0
def test_cluster_centers():
    x = [random.randn(20, 2) + 10, random.randn(20, 2)]
    n_clusters = np.random.randint(2, 7)
    model = LandmarkAgglomerative(n_clusters=n_clusters,
                                   linkage='ward')
    labels = model.fit_predict(x)
    print(model.cluster_centers_)
    assert model.cluster_centers_.shape == (n_clusters, 2)
예제 #8
0
def test_alanine_dipeptide():
    # test for rmsd metric compatibility with ward clustering
    # keep n_landmarks small or this will get really slow
    trajectories = AlanineDipeptide().get_cached().trajectories
    n_clusters = 4
    model = LandmarkAgglomerative(n_clusters=n_clusters, n_landmarks=20,
                                  linkage='ward', metric='rmsd')
    labels = model.fit_predict(trajectories[0][0:100])

    assert len(np.unique(np.concatenate(labels))) <= n_clusters
예제 #9
0
def test_1():
    x = [random.randn(10,2), random.randn(10,2)]
    
    n_clusters = 2
    model1 = LandmarkAgglomerative(n_clusters=n_clusters)
    model2 = LandmarkAgglomerative(n_clusters=n_clusters,
        n_landmarks=sum(len(s) for s in x))

    labels0 = clone(model1).fit(x).predict(x)
    labels1 = model1.fit_predict(x)
    labels2 = model2.fit_predict(x)
    
    assert len(labels0) == 2
    assert len(labels1) == 2
    assert len(labels2) == 2
    eq(labels0[0], labels1[0])
    eq(labels0[1], labels1[1])
    eq(labels0[0], labels2[0])
    eq(labels0[1], labels2[1])

    assert len(np.unique(np.concatenate(labels0))) == n_clusters
예제 #10
0
def test_1():
    x = [random.randn(10, 2), random.randn(10, 2)]

    n_clusters = 2
    model1 = LandmarkAgglomerative(n_clusters=n_clusters)
    model2 = LandmarkAgglomerative(n_clusters=n_clusters,
                                   n_landmarks=sum(len(s) for s in x))

    labels0 = clone(model1).fit(x).predict(x)
    labels1 = model1.fit_predict(x)
    labels2 = model2.fit_predict(x)

    assert len(labels0) == 2
    assert len(labels1) == 2
    assert len(labels2) == 2
    eq(labels0[0], labels1[0])
    eq(labels0[1], labels1[1])
    eq(labels0[0], labels2[0])
    eq(labels0[1], labels2[1])

    assert len(np.unique(np.concatenate(labels0))) == n_clusters