def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) assert cluster.compute_similarity( activesite_a, activesite_b) == cluster.compute_similarity(activesite_b, activesite_a) assert cluster.compute_similarity(activesite_a, activesite_a) == 1 assert cluster.compute_similarity(activesite_b, activesite_b) == 1
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) # update this assertion assert cluster.compute_similarity(activesite_a, activesite_b) == 0.959349593495935 assert cluster.compute_similarity(activesite_a, activesite_a) == 0.0
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) assert cluster.compute_similarity(activesite_a, activesite_b) == float("inf") assert round(cluster.compute_similarity(activesite_a, activesite_a), 1) == 0.0
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) # testing that the distance between the two sites is as expected assert cluster.compute_similarity(activesite_a, activesite_b) == 26.464581285329228 # testing the the distance between a site and itself is 0 assert cluster.compute_similarity(activesite_a, activesite_a) == 0
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) # update this assertion #check that the distance between A and B is the same as between B and A assert cluster.compute_similarity( activesite_a, activesite_b) == cluster.compute_similarity(activesite_b, activesite_a) #check that the distance between A and A is 0 assert cluster.compute_similarity(activesite_a, activesite_a) == 0.0 #check that the distance is always positive assert cluster.compute_similarity(activesite_a, activesite_a) >= 0.0
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) assert cluster.compute_similarity( activesite_a.counts, activesite_b.counts) == np.linalg.norm(activesite_a.counts - activesite_b.counts) assert cluster.compute_similarity(activesite_a.counts, activesite_a.counts) == 0 assert cluster.compute_similarity( activesite_a.counts, activesite_b.counts) == cluster.compute_similarity( activesite_b.counts, activesite_a.counts)
def test_distance(): #test that similarity between two same ones is 100% filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "276.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) # update this assertion assert cluster.compute_similarity(activesite_a, activesite_b) == 100.0
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) # update this assertion #First test: reflexivity of this distance metric assert cluster.compute_similarity(activesite_a, activesite_a) == 0.0 #Second test: symmetric trans_1 = cluster.compute_similarity(activesite_a, activesite_b) trans_2 = cluster.compute_similarity(activesite_b, activesite_a) assert trans_1 == trans_2 #Third test: non-negativity assert trans_1, trans_2 >= 0
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) assert cluster.compute_similarity(activesite_a, activesite_b, comparator="hydrophobicity") == 11.1556
def test_similarity(): filename_a = os.path.join("data", "46495.pdb") filename_b = os.path.join("data", "23812.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) cluster.get_order_residues([activesite_a]) cluster.get_order_residues([activesite_b]) # update this assertion assert (cluster.compute_similarity(activesite_a, activesite_b)) == 13
def assign_single_site_to_cluster(site, centroids): ''' check which cluster centroid is closest to the given site and assign the site to that cluster ''' loc = site.counts dists = {} for c in centroids.keys(): dist = cl.compute_similarity(loc, centroids[c]) dists[dist] = c closest = dists[min(dists.keys())] return closest
def test_similarity(): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) # dist(a,b) == expected assert round(compute_similarity(activesite_a, activesite_b), 3) == 13.857 # dist(a,a) == 0 assert compute_similarity(activesite_a, activesite_a) == 0 # dist(a,b) == dist(b,a) assert compute_similarity(activesite_a, activesite_b) == compute_similarity( activesite_b, activesite_a) # sign(dist(a,b)) == + active_sites = read_active_sites("data") for i in range(len(active_sites)): for j in range(len(active_sites)): if i != j: assert compute_similarity(active_sites[i], active_sites[j]) > 0
def compute_similarity_matrix(sites): ''' copy of computer similarity matrix from utils ''' simMat = [] names = [] for i in range(len(sites)): names.append(sites[i].name) row = [] for j in range(len(sites)): row.append(cl.compute_similarity(sites[i].counts, sites[j].counts)) simMat.append(row) simMat = pd.DataFrame(simMat, columns=names, index=names) return simMat
def compute_new_cluster_sim(new_clust_avg, simMat_update, sites_dict, clusters): ''' compute the similarity of the newly created cluster to the rest of the clusters ''' newSim = [] for site in simMat_update.columns: if site not in sites_dict: s = compute_cluster_center(clusters[site], sites_dict, aa_df) else: s = sites_dict[site].counts newSim.append(cl.compute_similarity(new_clust_avg, s)) newSim.append(0.0) return newSim
def compute_similarity_matrix(sites): ''' Compute the similarity matrix of all sites by all sites using the similarity algorithm in the cluster.py script In: list of sites out: similarity matrix as pandas dataframe ''' simMat = [] names = [] for i in range(len(sites)): names.append(sites[i].name) row = [] for j in range(len(sites)): row.append(cl.compute_similarity(sites[i].counts, sites[j].counts)) simMat.append(row) simMat = pd.DataFrame(simMat, columns=names, index=names) return simMat
def check_change_in_centroids(old_centroids, new_centroids): ''' check how far the centroids have moved ''' diff = 0 for c in old_centroids.keys(): diff += cl.compute_similarity(old_centroids[c], new_centroids[c]) return diff