def get_dn_ds_tree(self, dn_ds_method="NG86", tree_method="UPGMA", codon_table=default_codon_table): """Method for constructing dn tree and ds tree. Argument: - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML. - tree_method - Available methods include UPGMA and NJ. """ from Bio.Phylo.TreeConstruction import DistanceTreeConstructor dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method, codon_table=codon_table) dn_constructor = DistanceTreeConstructor() ds_constructor = DistanceTreeConstructor() if tree_method == "UPGMA": dn_tree = dn_constructor.upgma(dn_dm) ds_tree = ds_constructor.upgma(ds_dm) elif tree_method == "NJ": dn_tree = dn_constructor.nj(dn_dm) ds_tree = ds_constructor.nj(ds_dm) else: raise RuntimeError("Unknown tree method ({0}). Only NJ and UPGMA " "are accepted.".format(tree_method)) return dn_tree, ds_tree
def get_dn_ds_tree(self, dn_ds_method="NG86", tree_method="UPGMA", codon_table=None): """Construct dn tree and ds tree. Argument: - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML. - tree_method - Available methods include UPGMA and NJ. """ from Bio.Phylo.TreeConstruction import DistanceTreeConstructor if codon_table is None: codon_table = CodonTable.generic_by_id[1] dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method, codon_table=codon_table) dn_constructor = DistanceTreeConstructor() ds_constructor = DistanceTreeConstructor() if tree_method == "UPGMA": dn_tree = dn_constructor.upgma(dn_dm) ds_tree = ds_constructor.upgma(ds_dm) elif tree_method == "NJ": dn_tree = dn_constructor.nj(dn_dm) ds_tree = ds_constructor.nj(ds_dm) else: raise RuntimeError(f"Unknown tree method ({tree_method})." " Only NJ and UPGMA are accepted.") return dn_tree, ds_tree
def upgma_tree_constructor(x): constructor = DistanceTreeConstructor() calculator = DistanceCalculator("identity") dm = calculator.get_distance(x) upgmatree = constructor.upgma(dm) print(upgmatree) Phylo.draw_ascii(upgmatree)
def nj_tree_constructor(x): constructor = DistanceTreeConstructor() calculator = DistanceCalculator("identity") dm = calculator.get_distance(x) njtree = constructor.nj(dm) print(njtree) Phylo.draw_ascii(njtree)
def fastaToNJTree(fastaFile, outputFile): aln = AlignIO.read(fastaFile, 'fasta') calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) constructor = DistanceTreeConstructor(calculator, 'nj') tree = constructor.build_tree(aln) Phylo.write(tree, outputFile, 'newick')
def dna(file_path, file_format, algorithm): # Read the sequences and align aln = AlignIO.read(file_path, file_format) # Print the alignment print(aln) # Calculate the distance matrix calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) # Print the distance Matrix print('\nDistance Matrix\n===================') print(calculator) # Construct the phylogenetic tree using choosen algorithm constructor = DistanceTreeConstructor() if algorithm.lower() == 'upgma': tree = constructor.upgma(dm) elif algorithm.lower() == 'nj': tree = constructor.nj(dm) else: click.echo('Invalid algorithm!') # Draw the phylogenetic tree Phylo.draw(tree) # Print the phylogenetic tree in the terminal print('\nPhylogenetic Tree\n===================') Phylo.draw_ascii(tree)
def build_trees(filename, tree_name): # Compute alignment with ClustalW algorithm clustalw_cline = ClustalwCommandline("clustalw", infile="{}.fa".format(filename)) clustalw_cline() alignment = AlignIO.read("{}.aln".format(filename), format="clustal") # Create distance matrix calculator = DistanceCalculator('blosum62') dist_matrix = calculator.get_distance(alignment) # Build phylogenetic trees using upgma and nj methods constructor = DistanceTreeConstructor() upgma_tree = constructor.upgma(dist_matrix) nj_tree = constructor.nj(dist_matrix) # Draw the trees label_func = lambda clade: "" if clade.name.startswith("Inner") else clade Phylo.draw(upgma_tree, label_func=label_func, do_show=False) plt.title("{} × upgma".format(tree_name)) plt.show() Phylo.draw(nj_tree, label_func=label_func, do_show=False) plt.title("{} × nj".format(tree_name)) plt.show()
def make_newick_tree(dm): constructor = DistanceTreeConstructor() upgmatree = constructor.upgma(dm) njtree = constructor.nj(dm) upgmatree.root_with_outgroup({'name': "KE136308.1"}) njtree.root_with_outgroup({'name': "KE136308.1"}) return upgmatree, njtree
def get_tree(self, chrom, start=1, end=None, samples=None, return_format="tree_obj"): print("chrom: {} start: {} end: {} samples: {}".format( chrom, start, end, samples)) names, matrix = self.get_matrix(chrom, start=start, end=end, samples=samples, return_format="Phylo") distance_matrix = _DistanceMatrix(names, matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(distance_matrix) # neighbour joining tree if return_format == "tree_obj": return tree elif return_format == "newick": treeIO = StringIO() Phylo.write(tree, treeIO, "newick") treeString = treeIO.getvalue() treeString = treeString.strip() return treeString
def consensus(msa): alignment = MultipleSeqAlignment(msa) calculator = DistanceCalculator('identity') dm = calculator.get_distance(alignment) constructor = DistanceTreeConstructor(calculator, 'nj') tree = constructor.build_tree(alignment) print tree
def printGeneTree(self): """ Print gene trees with matplotlib and in the terminal for the four largest target ORFs of coronaviruses. Takes a .phy file containing multiple alligned sequences, generates a matrix based on sequence composition and compares each sequence (genome) to one another. sequences with grater scores (similarity) are ranked closer together on the phylogenetic trees. input: A .phy file that contains coronavirus gene sequences to draw phylogenetic tree output: A visual representation of a gene tree on terminal and matplotlib """ align = AlignIO.read( self.newPhylip, 'phylip') # Reads created .phy file containing the SeqRecord #print (align) # prints concatenated allignments calculator = DistanceCalculator('identity') dm = calculator.get_distance(align) # Calculate the distance matrix print( '\n======================================== DISTANCE MATRIX =======================================\n' ) print(dm, "\n\n") # Print the distance Matrix constructor = DistanceTreeConstructor( ) # Construct the phylogenetic tree using UPGMA algorithm tree = constructor.upgma(dm) print( '\n========================================= GENE TREE ===========================================\n' ) Phylo.draw( tree ) # Draw the phylogenetic tree (must install matplotlib to use this formatting) Phylo.draw_ascii(tree) # Print the phylogenetic tree in terminal
def summarise_dist(self, rf_results: RfResults, dir_out): for use_norm in (True, False): if use_norm: path_out = os.path.join(dir_out, 'rf_normed.tree') path_hm = os.path.join(dir_out, 'rf_normed_heatmap.svg') plt_title = 'Normalised Robinson-Foulds Distance' else: path_out = os.path.join(dir_out, 'rf_un_normed.tree') path_hm = os.path.join(dir_out, 'rf_un_normed_heatmap.svg') plt_title = '(un)Normalised Robinson-Foulds Distance' metrics = defaultdict(dict) names = set() for (tid_a, tid_b), (rf, norm_rf) in rf_results.data.items(): if use_norm: metrics[tid_a][tid_b] = norm_rf metrics[tid_b][tid_a] = norm_rf else: metrics[tid_a][tid_b] = rf metrics[tid_b][tid_a] = rf names.add(tid_a) names.add(tid_b) labels = sorted(list(names)) mat_vals = list() mat = np.zeros((len(labels), len(labels))) for i in range(len(labels)): cur_row = list() tid_a = labels[i] for j in range(i + 1): tid_b = labels[j] if tid_a == tid_b: cur_row.append(0.0) else: cur_row.append(metrics[tid_a][tid_b]) mat[i, j] = metrics[tid_a][tid_b] mat_vals.append(cur_row) mat = mat + mat.T # Newick dm = DistanceMatrix(names=labels, matrix=mat_vals) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) Phylo.write(tree, path_out, 'newick') # Heatmap cmap = sns.cubehelix_palette(100, reverse=True) sns.set(font_scale=1) fig_size = (15, 15) rf_df = pd.DataFrame(mat, columns=labels, index=labels) sns.clustermap(rf_df, annot=True, fmt='.3f', cmap=cmap, figsize=fig_size).fig.suptitle(plt_title) plt.savefig(path_hm)
def plot_phylo_tree(align: MultipleSeqAlignment, accession_numbers: dict): """ Plots a phylogenetic tree :param align: MultipleSeqAlignment with the alignment result to be plotted :param accession_numbers: dict of accession numbers and their translation to human-understandable names :return: figure-handle of the plotted phylogenetic tree """ # calculate distance - https://biopython.org/wiki/Phylo calculator = DistanceCalculator('identity') dm = calculator.get_distance(align) # construct a tree constructor = DistanceTreeConstructor() tree = constructor.upgma(dm) # remove the names for the non-terminals for better visual appeal for non_terminal in tree.get_nonterminals(): non_terminal.name = '' # change accession numbers into human more understandable names for terminal in tree.get_terminals(): terminal.name = accession_numbers[re.match("(^\S*)(?=\.)", terminal.name)[0]] print(Phylo.draw_ascii(tree)) # plot the tree fig, ax = plt.subplots(1, 1) # draw the resulting tree Phylo.draw(tree, show_confidence=False, axes=ax, do_show=False) ax.set_xlim(right=0.8) return fig
def construct_tree(gene_name, with_marburg=1, algorithm='UPGMA'): # Construct Tree with specific type (Default = UPGMA) if with_marburg == 1: print('Constructing Tree with All Viruses without Marburg') filename = algorithm + '_' + gene_name names = ['Bundibugyo', 'Reston', 'Sudan', 'TaiForest', 'Zaire'] else: print('Constructing {0}\'s Tree with All Viruses with Marburg'.format(gene_name)) filename = algorithm + '_' + gene_name + '_with_Marburg' names = ['Bundibugyo', 'Reston', 'Sudan', 'TaiForest', 'Zaire', 'Marburg'] marburg_genome = SeqIO.read("./Data/Marburg_genome.fasta", "fasta") Alignment.read_data() print('Aligning Genes for marburg_genome') gene_name += '_with_marburg' Alignment.read_genes(marburg_genome) print('Reading edit matrix and construct tree') edit_matrix = pd.read_csv("./Output/edit_matrices/" + gene_name + ".csv", header=None) # read edit matrix file constructor = DistanceTreeConstructor() # Create a tree constructor object edit_matrix = convert_tu_lower_triangular(edit_matrix) # Convert Edit Distance matrix to lower triangular distance_matrix = DistanceMatrix(names=names, matrix=edit_matrix) if algorithm == 'NJ': # Neighbor-Joining Alogrithm tree = constructor.nj(distance_matrix) else: # UPGMA Algorithm tree = constructor.upgma(distance_matrix) save_tree(tree, filename) # Save Tree into a file return tree
def get_tree(aln, kind='nj'): from Bio.Phylo.TreeConstruction import DistanceCalculator,DistanceTreeConstructor calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) return dm, tree
def buildTree(FASTAFile): myAlignment = AlignIO.read(FASTAFile, "fasta") # Create a tip mapping from the fasta file tipMapping = {} for record in myAlignment: tipMapping[record.id] = str(record.seq) # Compute a distance matrix and construct tree calculator = DistanceCalculator("identity") myMatrix = calculator.get_distance(myAlignment) constructor = DistanceTreeConstructor() upgmaTree = constructor.nj(myMatrix) upgmaTree.root_at_midpoint() Phylo.draw(upgmaTree) # Convert phyloxml tree to newick # biopython does not provide a function to do this so it was necessary # to write to a buffer in newick to convert then get rid of unneeded info for clade in upgmaTree.get_terminals(): clade.name = "\"" + clade.name + "\"" buf = cStringIO.StringIO() Phylo.write(upgmaTree, buf, 'newick', plain = True) tree = buf.getvalue() tree = re.sub(r'Inner\d*', '', tree) tree = tree.replace(";", "") tree = literal_eval(tree) #newick format # RLR tree required for maxParsimony function tree = NewicktoRLR(tree) return tree
def tree_reconstruction(phy_file, method, model, phyformat): '''Construct tree with given method and model''' aln = AlignIO.read(phy_file, 'phylip-' + phyformat) constructor = DistanceTreeConstructor() calculator = DistanceCalculator(model) dm = calculator.get_distance(aln) if method == 'upgma': tree = constructor.upgma(dm) elif method == 'nj': tree = constructor.nj(dm) tree.ladderize() for c in tree.find_clades(): if 'Inner' in c.name: c.name = '' Phylo.write(tree, args.output + '/tree.nwk', 'newick') plt.rcParams['font.style'] = 'italic' plt.rc('font', size=8) plt.rc('axes', titlesize=14) plt.rc('xtick', labelsize=10) plt.rc('ytick', labelsize=10) plt.rc('figure', titlesize=18) draw(tree, do_show=False) plt.savefig(args.output + "/tree.svg", format='svg', dpi=1200)
def build_phylogeny_trees(): path = "out/homologous_gene_sequences/" output_path = "out/aligned_homologous_gene_sequences/" for homologous_gene_sequence in os.listdir(path): input = path + homologous_gene_sequence output = output_path + homologous_gene_sequence clustal_omega = ClustalOmegaCommandline(infile=input, outfile=output, verbose=True, auto=True) os.system(str(clustal_omega)) multi_seq_align = AlignIO.read(output, 'fasta') # Distance Matrix calculator = DistanceCalculator('identity') dist_mat = calculator.get_distance(multi_seq_align) tree_constructor = DistanceTreeConstructor() phylo_tree = tree_constructor.upgma(dist_mat) Phylo.draw(phylo_tree) print('\nPhylogenetic Tree\n', homologous_gene_sequence) Phylo.draw_ascii(phylo_tree) Phylo.write([phylo_tree], 'out/phylogenetic_trees/{}_tree.nex'.format(homologous_gene_sequence), 'nexus')
def NJ(thatdm): # Reconstruct tree treehat = DistanceTreeConstructor().nj(thatdm) xtreehat = XTree( treehat, dict((clade, set([clade.name])) for clade in treehat.get_terminals())) return (xtreehat)
def construct_tree(matrix, nj=True): """Build a tree from a distance matrix Can either use neighbor-joining (nj) or UPGMA. """ if not (matrix and type(matrix) == list and len(matrix) > 0): print "matrix has invalid value" return dm = _DistanceMatrix(names=[str(i) for i in range(len(matrix))], matrix=matrix) constructor = DistanceTreeConstructor() if nj: tree = constructor.nj(dm) else: tree = constructor.upgma(dm) # this will remove the names from the inner nodes # this is critical for seq-gen to read in the tree for clade in tree.get_nonterminals(): clade.name = '' return tree
def main(argv): # Test table data and corresponding labels M_labels = [ 'Wuttagoonaspis', 'Romundina', 'Brindabellaspis', 'Eurycaraspis', 'Entelognathus' ] print(M_labels) #A through G M = np.loadtxt(open(argv[1], "rb"), delimiter=",") l = np.tril(M) temp = np.ones((5, 5)) u = np.triu(temp) l += u np.fill_diagonal(l, 0) M = l.tolist() for j in range(0, 5): for i in range(0, 5): M[i] = list(filter(lambda a: a != 1, M[i])) m = _Matrix(M_labels, M) print(type(m)) constructor = DistanceTreeConstructor() tree = constructor.upgma(m) Phylo.draw(tree)
def main(): file_name = "data/coding.fa" # file_name = "data/cons_noncode.fa" alignment = MultipleSeqAlignment([], Gapped(IUPAC.unambiguous_dna, "-")) for seq_record in SeqIO.parse(file_name, "fasta"): alignment.extend([seq_record]) print("Number of characters in alignment:", len(alignment[0])) #################### # Neighbor joining # #################### calculator = DistanceCalculator('identity') dm = calculator.get_distance(alignment) constructor = DistanceTreeConstructor() start = time.time() tree = constructor.nj(dm) end = time.time() print("Neighbor joining ran in {} seconds.".format(end - start)) Phylo.draw(tree, label_func=get_label) ######### # UPGMA # ######### start = time.time() tree = constructor.upgma(dm) end = time.time() print("UPGMA ran in {} seconds.".format(end - start)) Phylo.draw(tree, label_func=get_label)
def nj_wordlist( wordlist, column="Value", method=DistanceTreeConstructor.nj): """Create a tree using Hamming distances. From the CLDF Dataframe `wordlist`, create a tree using a distance method (neighbor joining, the default, or UPGMA) based on the Hamming distance (size of the symmetric difference) of presence/absence of the set of values in `column`. """ wordlist = pandas.read_csv(wordlist, sep="\t") cogids = [] languages = [] for language, data in wordlist.groupby("Language_ID"): languages.append(language) cogids.append(set(data[column])) dm = _DistanceMatrix(languages, [ [len(cogids[i] ^ cogids[j]) for j in range(i + 1)] for i in range(len(cogids))]) constructor = DistanceTreeConstructor() tree = method(constructor, dm) return tree
def main(): alignment = AlignIO.read(open("protein.fasta"), "fasta") calculator = DistanceCalculator('identity') dm = calculator.get_distance(alignment) constructor = DistanceTreeConstructor(calculator, 'upgma') tree = constructor.build_tree(alignment) tree.ladderize() Phylo.draw(tree)
def test_bootstrap_consensus(self): calculator = DistanceCalculator("blosum62") constructor = DistanceTreeConstructor(calculator, "nj") tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus) self.assertTrue(isinstance(tree, BaseTree.Tree)) Phylo.write(tree, os.path.join(temp_dir, "bootstrap_consensus.tre"), "newick")
def test_bootstrap_consensus(self): calculator = DistanceCalculator('blosum62') constructor = DistanceTreeConstructor(calculator, 'nj') tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus) self.assertTrue(isinstance(tree, BaseTree.Tree)) Phylo.write(tree, './TreeConstruction/bootstrap_consensus.tre', 'newick')
def create_tree_distance_impl(msa, algorithm): calculator = DistanceCalculator('identity') constructor = DistanceTreeConstructor(distance_calculator=calculator,method=algorithm) tree = constructor.build_tree(msa) Phylo.write(tree, "../../data/created/tree" + str(random.randint(0,10000000)) + ".nex", "nexus") Phylo.draw(tree,do_show=False) plt.savefig("../../data/created/createdTree"+algorithm+".png") return "../../data/created/createdTree"+algorithm+".png"
def get_tree(): #biopython-extract the unrooted tree aln = AlignIO.read('agc.aln', 'clustal') calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) return tree
def build_nj_tree(self): dm = self.distance_matrix() constructor = DistanceTreeConstructor() tree = constructor.nj(dm) treeio = StringIO.StringIO() Phylo.write(tree, treeio, 'newick') treestr = treeio.getvalue() treeio.close() return treestr
def build_tree(aln, kind='nj'): """Build a tree with bio.phylo module""" from Bio.Phylo.TreeConstruction import DistanceCalculator,DistanceTreeConstructor calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) return dm, tree