def test_similarity(): #sign(dist(a,b))==+ pdb_ids = [276, 4629] active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) features = find_features.calc_features(active_sites) assert scoring.L2(features['276'], features['4629']) > 0
def test_distance_to_self(): #dist(a,a)==0 pdb_ids = [276] active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) features = find_features.calc_features(active_sites) x = scoring.L2(features['276'], features['276']) assert x == 0.0
def test_recip_distance(): #dist(a,b)==dist(b,a) pdb_ids = [276, 4629] active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) features = find_features.calc_features(active_sites) assert scoring.L2(features['276'], features['4629']) == scoring.L2(features['4629'], features['276'])
def test_hierarchical_clustering(): # tractable subset pdb_ids = [276, 4629, 10701] active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) features = find_features.calc_features(active_sites) for res, feature_vect in features.items(): hier.hierarchical("V" + res, feature_vect, metric='Euclidian') assert hier.hierarchical.Cluster()
def test_partition_clustering(): # tractable subset pdb_ids = [276, 4629, 10701] active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) features = find_features.calc_features(active_sites) data = [] for res, feature_vect in features.items(): data.append(kmeans.data_point(label=res, data=tuple(feature_vect))) km = kmeans.kmeans(data=data, k=3, threshold=3) km.cluster() # update this assertion assert len(km.centroids.items()) == 3