# Now a function must be defined, that converts the similarity depicted # by a substitution matrix into a distance required by the UPGMA method. # In this case, the distance is defined as the difference between the # similarity of the two symbols and the average maximum similarity of # the symbols to themselves. # # Finally the obtained (phylogenetic) tree is plotted as dendrogram. def get_distance(similarities, i, j): s_max = (similarities[i, i] + similarities[j, j]) / 2 return s_max - similarities[i, j] distances = np.zeros(similarities.shape) for i in range(distances.shape[0]): for j in range(distances.shape[1]): distances[i, j] = get_distance(similarities, i, j) tree = phylo.upgma(distances) fig = plt.figure(figsize=(8.0, 5.0)) ax = fig.add_subplot(111) # Use the 3-letter amino acid code aa label labels = [ seq.ProteinSequence.convert_letter_1to3(letter).capitalize() for letter in matrix.get_alphabet1() ] graphics.plot_dendrogram(ax, tree, orientation="top", labels=labels) ax.set_ylabel("Distance") # Add grid for clearer distance perception ax.yaxis.grid(color="lightgray") plt.show()
# Create tree from root node tree = phylo.Tree(root=root) # Trees can be converted into Newick notation print("Tree:", tree.to_newick(labels=fruits)) # Distances can be omitted print("Tree w/o distances:", tree.to_newick(labels=fruits, include_distance=False)) # Distances can be measured distance = tree.get_distance(fruits.index("Apple"), fruits.index("Banana")) print("Distance Apple-Banana:", distance) ######################################################################## # You can also plot a tree as dendrogram. fig, ax = plt.subplots(figsize=(6.0, 6.0)) graphics.plot_dendrogram(ax, tree, labels=fruits) fig.tight_layout() ######################################################################## # From distances to trees # ^^^^^^^^^^^^^^^^^^^^^^^ # # When you want to create a :class:`Tree` from distances obtained for # example from sequence alignments, you can use the UPGMA algorithm # implemented in the function of the same name :func:`upgma()`. distances = np.array([[0, 17, 21, 31, 23], [17, 0, 30, 34, 21], [21, 30, 0, 28, 39], [31, 34, 28, 0, 43], [23, 21, 39, 43, 0]]) tree = phylo.upgma(distances) fig, ax = plt.subplots(figsize=(6.0, 3.0))
def _show_tree(tree): import biotite.sequence.graphics as graphics import matplotlib.pyplot as plt fig, ax = plt.subplots() graphics.plot_dendrogram(ax, tree) plt.show()
app.join() alignment = app.get_alignment() print(alignment) ######################################################################## # In most MSA software even more information than the mere alignment can # be extracted. # For instance the guide tree that was used for the alignment can be # obtained from the MUSCLE output. import matplotlib.pyplot as plt import biotite.sequence.graphics as graphics tree = app.get_guide_tree() fig, ax = plt.subplots() graphics.plot_dendrogram( ax, tree, labels=[str(sequence) for sequence in [seq1, seq2, seq3, seq4]]) ax.set_xlabel("Distance") fig.tight_layout() ######################################################################## # For the lazy people there is also a convenience method, # that handles the :class:`Application` execution internally. # However, this shortcut returns only the :class:`Alignment`. alignment = muscle.MuscleApp.align([seq1, seq2, seq3, seq4]) ######################################################################## # The alternatives to MUSCLE are Clustal-Omega and MAFFT. # To use them, simply replace :class:`MuscleApp` with # :class:`ClustalOmegaApp` or :class:`MafftApp` and you are done.
for name, seq_str in zip(UNIPROT_IDS.keys(), fasta_file.values()) } ### create a simple phylogenetic tree # create MSA alignment = clustalo.ClustalOmegaApp.align(list(sequences.values())) # build simple tree based on deviation from sequence identity distances = 1 - align.get_pairwise_sequence_identity(alignment, mode="shortest") tree = phylo.upgma(distances) ### plot the tree fig, ax = plt.subplots(1, 1, figsize=(8, 5)) graphics.plot_dendrogram(ax, tree, orientation="left", labels=list(UNIPROT_IDS.keys()), show_distance=False, linewidth=2) ax.grid(False) ax.set_xticks([]) # distance indicator indicator_len = 0.1 indicator_start = (ax.get_xlim()[0] + ax.get_xlim()[1] * 0.02, ax.get_ylim()[1] - ax.get_ylim()[1] * 0.15) indicator_stop = (indicator_start[0] + indicator_len, indicator_start[1]) indicator_center = ((indicator_start[0] + indicator_stop[0]) / 2, (indicator_start[1] + 0.25)) ax.annotate("", xy=indicator_start, xytext=indicator_stop,