Esempio n. 1
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")
    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    # dist(a,b) == expected
    assert round(compute_similarity(activesite_a, activesite_b), 3) == 13.857
    # dist(a,a) == 0
    assert compute_similarity(activesite_a, activesite_a) == 0
    # dist(a,b) == dist(b,a)
    assert compute_similarity(activesite_a,
                              activesite_b) == compute_similarity(
                                  activesite_b, activesite_a)

    # sign(dist(a,b)) == +
    active_sites = read_active_sites("data")
    for i in range(len(active_sites)):
        for j in range(len(active_sites)):
            if i != j:
                assert compute_similarity(active_sites[i], active_sites[j]) > 0
Esempio n. 2
0
def test_hierarchical_clustering():
    random.seed(40)
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    # update this assertion
    label = cluster.cluster_hierarchically(active_sites)
    assert all(label[1] == [0, 0, 1])
Esempio n. 3
0
def test_rand():

    pdb_ids = [276, 4629, 10701, 10814, 13052, 14181]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    c1 = [["276", "4629"], ["10701", "10814"], ["13052", "14181"]]
    c2 = [["10701", "10814"], ["276", "4629"], ["13052", "14181"]]

    assert compare_clusters.rand_index(c1, c2, active_sites) == 1
Esempio n. 4
0
def test_partition_clustering():
    # tractable subset
    pdb_ids = [276, 39299, 38031]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))
    sim_matrix = cluster.similarity_matrix(active_sites)

    assert cluster.cluster_by_partitioning([], {}) == []
    assert cluster.cluster_by_partitioning([active_sites[0]],
                                           {}) == [[active_sites[0]]]
Esempio n. 5
0
def test_distance_to_self():
    #dist(a,a)==0
    pdb_ids = [276]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    features = find_features.calc_features(active_sites)
    x = scoring.L2(features['276'], features['276'])

    assert x == 0.0
Esempio n. 6
0
def test_hierarchical_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    # update this assertion
    # checking the the three sites cluster as expected
    assert np.array_equal(
        cluster.cluster_hierarchically(active_sites, 2)[0], [0, 0, 1])
Esempio n. 7
0
def test_partition_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    assert cluster.cluster_by_partitioning(active_sites) == [["276"], ["4629"],
                                                             ["10701"]]

    # check empty active sites doesn't crash
    assert cluster.cluster_by_partitioning(None) is None
Esempio n. 8
0
def test_recip_distance():
    #dist(a,b)==dist(b,a)
    pdb_ids = [276, 4629]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    features = find_features.calc_features(active_sites)

    assert scoring.L2(features['276'],
                      features['4629']) == scoring.L2(features['4629'],
                                                      features['276'])
Esempio n. 9
0
def test_hierarchical_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    # update this assertion
    assert cluster.test_format_hier(
        cluster.cluster_hierarchically(active_sites)) == [[[276, 4629],
                                                           [10701]],
                                                          [[276, 4629, 10701]]]
Esempio n. 10
0
def test_partition_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701, 10701,10814,13052,14181,15813]


    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb"%id)
        active_sites.append(io.read_active_site(filepath))

    cluster.get_order_residues(active_sites)
    # update this assertion
    assert len(cluster.cluster_by_partitioning(active_sites,2)[0]) == 2
    assert len(cluster.cluster_by_partitioning(active_sites,3)[0]) == 3
Esempio n. 11
0
def test_hierarchical_clustering():
    # tractable subset
    pdb_ids = [276, 52954, 34088]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    sim_matrix = cluster.similarity_matrix(active_sites)

    # update this assertion
    assert cluster.cluster_hierarchically([], {}) == []
    assert cluster.cluster_hierarchically([active_sites[0]],
                                          {}) == [[active_sites[0]]]
Esempio n. 12
0
def test_hierarchical_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    features = find_features.calc_features(active_sites)

    for res, feature_vect in features.items():
        hier.hierarchical("V" + res, feature_vect, metric='Euclidian')

    assert hier.hierarchical.Cluster()
Esempio n. 13
0
def test_hierarchical_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))
    assignment = cluster.cluster_hierarchically(active_sites)

    assert cluster.convert_indices_to_active_sites(assignment,
                                                   active_sites) == [["276"],
                                                                     ["4629"],
                                                                     ["10701"]]

    pdb_ids = [276, 4629]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    # check empty active sites doesn't crash
    assert cluster.cluster_hierarchically(None, K=10) is None
Esempio n. 14
0
def test_hierarchical_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    # clusters more similar clusters together
    clustering = cluster.cluster_hierarchically(active_sites, [2])
    assert get_names(flatten(clustering[0])) in [['4629', '276'], ['10701']]
    assert get_names(flatten(clustering[1])) in [['4629', '276'], ['10701']]

    # len(clustered_list.unique()==k)
    active_sites = read_active_sites("data")
    assert len(cluster.cluster_hierarchically(active_sites, [2])) == 2
    assert len(cluster.cluster_hierarchically(active_sites, [3])) == 3
Esempio n. 15
0
def test_hierarchical_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    clusters = []
    for clust in cluster.cluster_hierarchically(active_sites, 1):
        elems = []
        for e in clust:
            elems.append([int(n.name) for n in e])
        clusters.append(elems)

    assert clusters == [[[276], [4629], [10701]], [[276], [10701, 4629]],
                        [[276, 10701, 4629]]]
Esempio n. 16
0
def test_partition_clustering():
    # tractable subset
    pdb_ids = [276, 4629, 10701]

    active_sites = []
    for id in pdb_ids:
        filepath = os.path.join("data", "%i.pdb" % id)
        active_sites.append(io.read_active_site(filepath))

    features = find_features.calc_features(active_sites)

    data = []
    for res, feature_vect in features.items():
        data.append(kmeans.data_point(label=res, data=tuple(feature_vect)))

    km = kmeans.kmeans(data=data, k=3, threshold=3)
    km.cluster()
    # update this assertion
    assert len(km.centroids.items()) == 3
Esempio n. 17
0
def test_get_order_residues():
    filename_a = os.path.join("data", "46495.pdb")
    activesite_a = io.read_active_site(filename_a)
    assert np.array_equal((activesite_a.newresidues),[])
    cluster.get_order_residues([activesite_a])
    assert np.array_equal((activesite_a.newresidues),['ASP', 'LYS', 'SER', 'ARG', 'ASP', 'ASP', 'ASP'])