def test_node_distance(tree): """ Test whether the `distance_to()` and `lowest_common_ancestor()` work correctly. """ # Tree is created via UPGMA # -> The distances to root should be equal for all leaf nodes dist = tree.root.distance_to(tree.leaves[0]) for leaf in tree.leaves: assert leaf.distance_to(tree.root) == dist # Example topological distances assert tree.get_distance(0, 19, True) == 9 assert tree.get_distance(4, 2, True) == 10 # All pairwise leaf node distances should be sufficient # to reconstruct the same tree via UPGMA ref_dist_mat = np.zeros((len(tree), len(tree))) for i in range(len(tree)): for j in range(len(tree)): ref_dist_mat[i, j] = tree.get_distance(i, j) assert np.allclose(ref_dist_mat, ref_dist_mat.T) new_tree = phylo.upgma(ref_dist_mat) test_dist_mat = np.zeros((len(tree), len(tree))) for i in range(len(tree)): for j in range(len(tree)): test_dist_mat[i, j] = new_tree.get_distance(i, j) assert np.allclose(test_dist_mat, ref_dist_mat)
# Now a function must be defined, that converts the similarity depicted # by a substitution matrix into a distance required by the UPGMA method. # In this case, the distance is defined as the difference between the # similarity of the two symbols and the average maximum similarity of # the symbols to themselves. # # Finally the obtained (phylogenetic) tree is plotted as dendrogram. def get_distance(similarities, i, j): s_max = (similarities[i, i] + similarities[j, j]) / 2 return s_max - similarities[i, j] distances = np.zeros(similarities.shape) for i in range(distances.shape[0]): for j in range(distances.shape[1]): distances[i, j] = get_distance(similarities, i, j) tree = phylo.upgma(distances) fig = plt.figure(figsize=(8.0, 5.0)) ax = fig.add_subplot(111) # Use the 3-letter amino acid code aa label labels = [ seq.ProteinSequence.convert_letter_1to3(letter).capitalize() for letter in matrix.get_alphabet1() ] graphics.plot_dendrogram(ax, tree, orientation="top", labels=labels) ax.set_ylabel("Distance") # Add grid for clearer distance perception ax.yaxis.grid(color="lightgray") plt.show()
def tree(distances): return phylo.upgma(distances)