def test_neighbor_joining(): """ Compare the results of `neighbor_join()` with a known tree. """ dist = np.array([ [0, 5, 4, 7, 6, 8], [5, 0, 7, 10, 9, 11], [4, 7, 0, 7, 6, 8], [7, 10, 7, 0, 5, 9], [6, 9, 6, 5, 0, 8], [8, 11, 8, 9, 8, 0], ]) ref_tree = phylo.Tree( phylo.TreeNode([ phylo.TreeNode([ phylo.TreeNode([ phylo.TreeNode(index=0), phylo.TreeNode(index=1), ], [1, 4]), phylo.TreeNode(index=2), ], [1, 2]), phylo.TreeNode([ phylo.TreeNode(index=3), phylo.TreeNode(index=4), ], [3, 2]), phylo.TreeNode(index=5), ], [1, 1, 5])) test_tree = phylo.neighbor_joining(dist) assert test_tree == ref_tree
fasta_file = fasta.FastaFile.read( entrez.fetch_single_file(ids, file_name=None, db_name="protein", ret_type="fasta")) sequences = [seq.ProteinSequence(seq_str) for seq_str in fasta_file.values()] # Create multiple sequence alignment with Clustal Omega alignment = clustalo.ClustalOmegaApp.align(sequences) # The distance measure required for the tree calculation is the # percentage of non-identical amino acids in the respective two # sequences distances = 1 - align.get_pairwise_sequence_identity(alignment, mode="shortest") # Create tree via neighbor joining tree = phylo.neighbor_joining(distances) # Convert to NetworkX graph #For the graph visualization, the edge directions are unnecessary graph = tree.as_graph().to_undirected() fig = plt.figure(figsize=(8.0, 8.0)) ax = fig.gca() ax.axis("off") # Calculate position of nodes in the plot pos = nx.kamada_kawai_layout(graph) # Assign the gene names to the nodes that represent a reference index node_labels = {i: name for i, name in enumerate(genes)} nx.draw_networkx( graph, pos, ax=ax,