def build(self, root='midpoint', raxml=True, raxml_time_limit=0.5): from Bio import Phylo, AlignIO import subprocess, glob, shutil make_dir(self.run_dir) os.chdir(self.run_dir) for seq in self.aln: seq.name=seq.id AlignIO.write(self.aln, 'temp.fasta', 'fasta') tree_cmd = ["fasttree"] if self.nuc: tree_cmd.append("-nt") tree_cmd.append("temp.fasta") tree_cmd.append(">") tree_cmd.append("initial_tree.newick") os.system(" ".join(tree_cmd)) out_fname = "tree_infer.newick" if raxml: if raxml_time_limit>0: tmp_tree = Phylo.read('initial_tree.newick','newick') resolve_iter = 0 resolve_polytomies(tmp_tree) while (not tmp_tree.is_bifurcating()) and (resolve_iter<10): resolve_iter+=1 resolve_polytomies(tmp_tree) Phylo.write(tmp_tree,'initial_tree.newick', 'newick') AlignIO.write(self.aln,"temp.phyx", "phylip-relaxed") print( "RAxML tree optimization with time limit", raxml_time_limit, "hours") # using exec to be able to kill process end_time = time.time() + int(raxml_time_limit*3600) process = subprocess.Popen("exec raxml -f d -T " + str(self.nthreads) + " -j -s temp.phyx -n topology -c 25 -m GTRCAT -p 344312987 -t initial_tree.newick", shell=True) while (time.time() < end_time): if os.path.isfile('RAxML_result.topology'): break time.sleep(10) process.terminate() checkpoint_files = glob.glob("RAxML_checkpoint*") if os.path.isfile('RAxML_result.topology'): checkpoint_files.append('RAxML_result.topology') if len(checkpoint_files) > 0: last_tree_file = checkpoint_files[-1] shutil.copy(last_tree_file, 'raxml_tree.newick') else: shutil.copy("initial_tree.newick", 'raxml_tree.newick') else: shutil.copy("initial_tree.newick", 'raxml_tree.newick') try: print("RAxML branch length optimization") os.system("raxml -f e -T " + str(self.nthreads) + " -s temp.phyx -n branches -c 25 -m GTRGAMMA -p 344312987 -t raxml_tree.newick") shutil.copy('RAxML_result.branches', out_fname) except: print("RAxML branch length optimization failed") shutil.copy('raxml_tree.newick', out_fname) else: shutil.copy('initial_tree.newick', out_fname) self.tt_from_file(out_fname, root) os.chdir('..') remove_dir(self.run_dir) self.is_timetree=False
def convert_boottrees(fname_trees): out_fnames = [] for i, tree in enumerate(Phylo.parse(fname_trees, "newick")): fname_tree = "%s.codeml-%d" % (fname_trees, i) Phylo.write(tree, fname_tree, "newick") out_fnames.append(fname_tree) return out_fnames
def test_root_with_outgroup(self): """Tree.root_with_outgroup: reroot at a given clade.""" # On a large realistic tree, at a deep internal node tree = Phylo.read(EX_APAF, 'phyloxml') orig_num_tips = len(tree.get_terminals()) orig_tree_len = tree.total_branch_length() tree.root_with_outgroup('19_NEMVE', '20_NEMVE') self.assertEqual(orig_num_tips, len(tree.get_terminals())) self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) # Now, at an external node tree.root_with_outgroup('1_BRAFL') self.assertEqual(orig_num_tips, len(tree.get_terminals())) self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) # Specifying outgroup branch length mustn't change the total tree size tree.root_with_outgroup('2_BRAFL', outgroup_branch_length=0.5) self.assertEqual(orig_num_tips, len(tree.get_terminals())) self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) tree.root_with_outgroup('36_BRAFL', '37_BRAFL', outgroup_branch_length=0.5) self.assertEqual(orig_num_tips, len(tree.get_terminals())) self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) # On small contrived trees, testing edge cases for small_nwk in ( '(A,B,(C,D));', '((E,F),((G,H)),(I,J));', '((Q,R),(S,T),(U,V));', '(X,Y);', ): tree = Phylo.read(StringIO(small_nwk), 'newick') orig_tree_len = tree.total_branch_length() for node in list(tree.find_clades()): tree.root_with_outgroup(node) self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
def initUI(self): #field for drawing Ascii tree self.textEdit = QtGui.QTextEdit() self.textEdit.setReadOnly(True) self.textEdit.setFontFamily('Courier') self.textEdit.setWordWrapMode(True) #self.textEdit.setStyleSheet('') # layout self.layout = QtGui.QVBoxLayout(self) self.layout.addWidget(self.textEdit) self.setLayout(self.layout) #print tree self.tmpf = open('/tmp/ascii.txt', 'w') Phylo.draw_ascii(self.tree, self.tmpf) self.tmpf = open('/tmp/ascii.txt', 'r') with self.tmpf: self.data = self.tmpf.read() self.textEdit.setText(self.data) self.setGeometry(200, 200, 700, 400) self.setWindowTitle('Tekstowe wyswietlanie') self.show()
def test_newick_read_scinot(self): """Parse Newick branch lengths in scientific notation.""" tree = Phylo.read(StringIO("(foo:1e-1,bar:0.1)"), 'newick') clade_a = tree.clade[0] self.assertEqual(clade_a.name, 'foo') self.assertAlmostEqual(clade_a.branch_length, 0.1) """Additional tests to check correct parsing""" tree = Phylo.read(StringIO("(A:1, B:-2, (C:3, D:4):-2)"),'newick') self.assertEqual(tree.distance('A'),1) self.assertEqual(tree.distance('B'),-2) self.assertEqual(tree.distance('C'),1) self.assertEqual(tree.distance('D'),2) tree = Phylo.read(StringIO("((A:1, B:-2):-5, (C:3, D:4):-2)"),'newick') self.assertEqual(tree.distance('A'),-4) self.assertEqual(tree.distance('B'),-7) self.assertEqual(tree.distance('C'),1) self.assertEqual(tree.distance('D'),2) tree = Phylo.read(StringIO("((:1, B:-2):-5, (C:3, D:4):-2)"),'newick') distances = {-4.0:1,-7.0:1,1:1,2:1} for x in tree.get_terminals(): entry = int(tree.distance(x)) distances[entry] -= distances[entry] self.assertEqual(distances[entry],0) tree = Phylo.read(StringIO("((:\n1\n,\n B:-2):-5, (C:3, D:4):-2);"),'newick') distances = {-4.0:1,-7.0:1,1:1,2:1} for x in tree.get_terminals(): entry = int(tree.distance(x)) distances[entry] -= distances[entry] self.assertEqual(distances[entry],0)
def test_draw_ascii(self): """Tree to Graph conversion, if networkx is available.""" handle = StringIO() tree = Phylo.read(EX_APAF, 'phyloxml') Phylo.draw_ascii(tree, file=handle) Phylo.draw_ascii(tree, file=handle, column_width=120) handle.close()
def test_convert_phyloxml_filename(self): """Write phyloxml to a given filename.""" trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") tmp_filename = tempfile.mktemp() count = Phylo.write(trees, tmp_filename, "phyloxml") os.remove(tmp_filename) self.assertEqual(13, count)
def reroot_tree_with_outgroup(tree_name, outgroups): clade_outgroups = GubbinsCommon.get_monophyletic_outgroup(tree_name, outgroups) outgroups = [{"name": taxon_name} for taxon_name in clade_outgroups] tree = Phylo.read(tree_name, "newick") tree.root_with_outgroup(*outgroups) Phylo.write(tree, tree_name, "newick") tree = dendropy.Tree.get_from_path(tree_name, "newick", preserve_underscores=True) tree.deroot() tree.update_bipartitions() output_tree_string = tree.as_string( schema="newick", suppress_leaf_taxon_labels=False, suppress_leaf_node_labels=True, suppress_internal_taxon_labels=False, suppress_internal_node_labels=False, suppress_rooting=True, suppress_edge_lengths=False, unquoted_underscores=True, preserve_spaces=False, store_tree_weights=False, suppress_annotations=True, annotations_as_nhx=False, suppress_item_comments=True, node_label_element_separator=" ", ) with open(tree_name, "w+") as output_file: output_file.write(output_tree_string.replace("'", "")) output_file.closed
def phylo2newick(self, t): """ Convert Phylo into Newick tree string. """ output = StringIO() Phylo.write(t, output, "newick") return output.getvalue()
def make_tree_figure(wanted_seqs, trop_dict, tree_file): mat_data = get_pairwise_distances(wanted_seqs, tree_file = tree_file) tree = Phylo.read(open(tree_file), 'newick') net = Phylo.to_networkx(tree) node_mapping = {} clade = 1 for node in net.nodes(): if node.name is None: node_mapping[node] = 'Clade-%i' % clade clade += 1 else: node_mapping[node] = node.name new_net = networkx.relabel_nodes(net, node_mapping) colors = [] for node in new_net.nodes(): if node.startswith('Clade'): colors.append('w') elif trop_dict[node]: colors.append('g') elif not trop_dict[node]: colors.append('r') else: print node #print colors, len(colors), len(new_net.nodes()) pos = networkx.graphviz_layout(new_net, 'twopi') networkx.draw_networkx(new_net, pos, with_labels = False, node_color = colors)
def tree(from_cluster,to_cluster, grupa): consensus_trees = [] for i in [x for x in range(from_cluster,to_cluster)]: msa = AlignIO.read('msa\msa_rodzina_' + str(i)+ '_s.fasta', 'fasta') print i calculator = DistanceCalculator('identity') try: dm = calculator.get_distance(msa) constructor = DistanceTreeConstructor(calculator, 'nj') trees = bootstrap_trees(msa, 50, constructor) trees_list = list(trees) not_included = set([]) for j in range(len(trees_list)): target_tree = trees_list[j] support_tree = get_support(target_tree, trees_list) for node in support_tree.get_nonterminals(): if node.confidence < 50: not_included.add(j) trees = [trees_list[k] for k in range(len(trees_list)) if k not in not_included] if len(trees) > 0: consensus_trees.append(majority_consensus(trees)) except: ValueError Phylo.write(consensus_trees,"drzewa_wynikowe_" + str(grupa),"newick")
def genTaxTree(resolver, namesdict, logger, taxonomy=None, draw=False): """Return Phylo from TaxonNamesResolver class.""" ranks = resolver.retrieve('classification_path_ranks') qnames = resolver.retrieve('query_name') lineages = resolver.retrieve('classification_path') # replace ' ' with '_' for taxon tree qnames = [re.sub("\s", "_", e) for e in qnames] resolved_names_bool = [e in namesdict.keys() for e in qnames] ranks = [ranks[ei] for ei, e in enumerate(resolved_names_bool) if e] lineages = [lineages[ei] for ei, e in enumerate(resolved_names_bool) if e] # identify unresolved names unresolved_names = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if not e] idents = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if e] statement = "Unresolved names: " for each in unresolved_names: statement += " " + each logger.debug(statement) # make taxdict taxdict = TaxDict(idents=idents, ranks=ranks, lineages=lineages, taxonomy=taxonomy) # make treestring treestring = taxTree(taxdict) if not taxonomy: d = 22 # default_taxonomy + 1 in tnr else: d = len(taxonomy) + 1 # add outgroup treestring = '({0},outgroup:{1});'.format(treestring[:-1], float(d)) tree = Phylo.read(StringIO(treestring), "newick") if draw: Phylo.draw_ascii(tree) return tree
def draw_tree(tree, node_label = node_label_func, branch_label = lambda x:None, cmap = cm.jet, axes = None, cb = True): ''' plots a tree on an empty canvas including a scalebar of length 0.005 ''' import matplotlib.pyplot as plt from Bio import Phylo if axes is None: fig = plt.figure(figsize = (8,6)) axes = plt.subplot(111) Phylo.draw(tree, label_func = node_label, show_confidence = False,branch_labels = branch_label, axes=axes) axes.axis('off') xlimits = axes.get_xlim() ylimits = axes.get_ylim() x0 = xlimits[0]+(xlimits[1]-xlimits[0])*0.05 x1 = x0+0.005 y0 = ylimits[0]+(ylimits[1]-ylimits[0])*0.05 plt.plot([x0,x1], [y0,y0], lw=2, c='k') plt.text(x0+0.0025, y0+(ylimits[1]-y0)*0.01, '0.005', ha='center') # fake a colorbar if cb: sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=0, vmax=100)) sm._A = [] cbar = plt.colorbar(sm, ticks=[0,100], shrink=0.5, aspect=10,pad=-0.05) cbar.set_ticklabels(['worst','best']) plt.draw() return axes
def get_tree(tree_file, name_tree): tree = Phylo.read( open(tree_file, 'r'), "newick") tree_name = Phylo.read( open(name_tree, 'r'), "newick") #set node number for nonterminal nodes and specify root node numInternalNode = 0 for clade in tree.get_nonterminals(): clade.name = 'N' + str(numInternalNode) clade.branch_length = clade.confidence numInternalNode += 1 for clade_iter in range(len(tree.get_terminals())): clade = tree.get_terminals()[clade_iter] clade.branch_length = clade.confidence clade.name = tree_name.get_terminals()[clade_iter].name tree_phy = tree.as_phyloxml(rooted = 'True') tree_nx = Phylo.to_networkx(tree_phy) triples = ((u.name, v.name, d['weight']) for (u, v, d) in tree_nx.edges(data = True)) # data = True to have the blen as 'weight' T = nx.DiGraph() edge_to_blen = {} for va, vb, blen in triples: edge = (va, vb) T.add_edge(*edge) edge_to_blen[edge] = blen edge_list = edge_to_blen.keys() edge_list.sort(key = lambda node: int(node[0][1:])) return edge_to_blen, edge_list
def measure_D_net(G,qmod,qcon): D_net_dic = {} D_net_ret = {} D_net = [] for u in G: D_net_dic[u] = {} for u in sorted(G): key1 = "Taxon" + str(u) tmp_row = [] for v in sorted(G): key2 = "Taxon" + str(v) if u < v: continue D_net_dic[u][v] = 1.0 - G.dmc_likelihood(u,v,qmod,qcon) tmp_row.append(D_net_dic[u][v]) print D_net_dic[u][v], D_net.append(tmp_row) print '\n' names = [] for u in G: names.append('Taxon'+str(u)) print names print D_net D_net_final = _DistanceMatrix(names,D_net) #print D_net_final.names constructor = DistanceTreeConstructor() tree_dmc = constructor.upgma(D_net_final) #print tree_dmc Phylo.write(tree_dmc,'ph_dmc.nre','newick') return D_net_final
def distance_matrix(cls, cluster_list): print cluster_list dists = Distance.objects.filter(rep_accnum1__in=cluster_list, rep_accnum2__in=cluster_list) distance_pairs = {g.rep_accnum1 + '_' + g.rep_accnum2: g.distance for g in dists.all()} matrix = [] for i in range(0,len(cluster_list)): matrix_iteration = [] for j in range(0,i+1): if i == j: matrix_iteration.append(0) elif cluster_list[i] + '_' + cluster_list[j] in distance_pairs: matrix_iteration.append(distance_pairs[cluster_list[i] + '_' + cluster_list[j]]) elif cluster_list[j] + '_' + cluster_list[i] in distance_pairs: matrix_iteration.append(distance_pairs[cluster_list[j] + '_' + cluster_list[i]]) else: raise("Error, can't find pair!") matrix.append(matrix_iteration) #print matrix_iteration cluster_list = [s.encode('ascii', 'ignore') for s in cluster_list] matrix_obj = _DistanceMatrix(names=cluster_list, matrix=matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(matrix_obj) tree.ladderize() #Phylo.draw_ascii(tree) output = StringIO.StringIO() Phylo.write(tree, output, 'newick') tree_str = output.getvalue() #print tree_str return tree_str
def rootTree(f, root,output): tree = Phylo.read(f,'newick') if ',' in root: taxa = root.split(',') root = tree.common_ancestor(taxa) tree.root_with_outgroup(root) Phylo.write(tree,output,'newick')
def make_trees(self, force=False): for i, (root, _, files) in enumerate(os.walk(self.seed_directory)): if i==0: #skip base path continue hist_type = os.path.basename(root) print "Creating tree for", hist_type final_tree_name = os.path.join(self.trees_path, "{}_no_features.xml".format(hist_type)) if not force and os.path.isfile(final_tree_name): continue if not os.path.exists(self.trees_path): os.makedirs(self.trees_path) #Combine all variants for a core histone type into one unaligned fasta file combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(hist_type)) combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(hist_type)) with open(combined_seed_file, "w") as combined_seed: for seed in files: if not seed.endswith(".fasta"): continue for s in SeqIO.parse(os.path.join(self.seed_directory, hist_type, seed), "fasta"): s.seq = s.seq.ungap("-") SeqIO.write(s, combined_seed, "fasta") #Create trees and convert them to phyloxml tree = os.path.join(self.trees_path, "{}_aligned.ph".format(hist_type)) subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned]) print " ".join(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree']) subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree']) Phylo.convert(tree, 'newick', final_tree_name, 'phyloxml')
def get_tree(newicktree): tree = Phylo.read( newicktree, "newick") #set node number for nonterminal nodes and specify root node numInternalNode = 0 for clade in tree.get_nonterminals(): clade.name = 'N' + str(numInternalNode) numInternalNode += 1 tree_phy = tree.as_phyloxml(rooted = 'True') tree_nx = Phylo.to_networkx(tree_phy) triples = ((u.name, v.name, d['weight']) for (u, v, d) in tree_nx.edges(data = True)) # data = True to have the blen as 'weight' T = nx.DiGraph() edge_to_blen = {} for va, vb, blen in triples: edge = (va, vb) T.add_edge(*edge) edge_to_blen[edge] = blen # Now assign node_to_num leaves = set(v for v, degree in T.degree().items() if degree == 1) internal_nodes = set(list(T)).difference(leaves) node_names = list(internal_nodes) + list(leaves) # Prepare for generating self.tree so that it has same order as the self.x_process nEdge = len(edge_to_blen) # number of edges l = nEdge / 2 + 1 # number of leaves k = l - 1 # number of internal nodes. The notation here is inconsistent with Alex's for trying to match my notes. leaf_branch = [edge for edge in edge_to_blen.keys() if edge[0][0] == 'N' and str.isdigit(edge[0][1:]) and not str.isdigit(edge[1][1:])] out_group_branch = [edge for edge in leaf_branch if edge[0] == 'N0' and not str.isdigit(edge[1][1:])] [0] internal_branch = [x for x in edge_to_blen.keys() if not x in leaf_branch] assert(len(internal_branch) == k-1) # check if number of internal branch is one less than number of internal nodes return list(leaves), out_group_branch
def write_clusters(seqfname, tree, clusters, unclustered): """Write output files: clusters & unique as FASTA, tree as phyloXML.""" is_aln = seqfname.endswith('.aln') seq_idx = SeqIO.to_dict(SeqIO.parse(seqfname, 'clustal' if is_aln else 'fasta')) def write_cluster(cluster, fname): """Write the sequences of cluster tips to a FASTA file.""" records = [seq_idx[seqid] for seqid in sorted(cluster)] with open(fname, 'w+') as handle: for rec in records: write_fasta(rec, handle, do_ungap=is_aln) logging.info("Wrote %s (%d sequences)", fname, len(records)) colors = [BranchColor(*map(lambda x: int(x*255), rgb)) for rgb in ColorSpiral().get_colors(len(clusters))] for i, item in enumerate(sorted(clusters.iteritems(), reverse=True, key=lambda kv: len(kv[1]))): clade, cluster = item write_cluster(cluster, os.path.basename(seqfname) + '.' + str(i)) clade.color = colors[i] clade.width = 2 if unclustered: write_cluster(unclustered, os.path.basename(seqfname) + '.Unique') treefname = os.path.basename(seqfname) + '.xml' Phylo.write(tree, treefname, 'phyloxml') logging.info("Wrote %s", treefname)
def plot_tree(tree): from Bio import Phylo from tree_util import to_Biopython, color_BioTree_by_attribute btree = to_Biopython(tree) color_BioTree_by_attribute(btree,"cHI", transform = lambda x:x) Phylo.draw(btree, label_func = lambda x: 'X' if x.serum else '' if x.HI_info else '', show_confidence= False) #, branch_labels = lambda x:x.mutations)
def serialize_trees(self, tree_uri='', format='newick', trees=None, handle=None): '''Retrieve trees serialized to any format supported by Biopython. Current options include 'newick', 'nexus', 'phyloxml', 'nexml', and 'cdao' Example: >>> treestore.serialize_trees('http://www.example.org/test/') ''' if handle: s = handle else: s = StringIO() if tree_uri: tree_uri = self.uri_from_id(tree_uri) if trees is None: trees = [(x for x in self.get_trees(tree_uri)).next()] if not trees: raise Exception('Tree to be serialized not found.') if format == 'cdao': bp.write(trees, s, format, tree_uri=tree_uri) elif format == 'ascii': bp._utils.draw_ascii((i for i in trees).next(), file=s) else: bp.write(trees, s, format) if handle: return return s.getvalue()
def generate_new_files(fname) : # to get gene names that slr can handle (short enough) newfname = fname.replace(".", "_.") # generate a fasta file with new ids d = {} sequences = [] runningids = 1 for record in SeqIO.parse(fname, 'fasta') : d[record.id] = "flyg%s" % runningids record.id = d[record.id] record.name = "" record.description = "" sequences.append(record) runningids += 1 SeqIO.write(sequences, newfname, "fasta") if not RUN_RAXML : # generate a treefile with new ids treefile = fname.replace("fasta", "tree") newtreefile = newfname.replace("fasta", "tree") tree = Phylo.read(treefile, 'newick') for node in tree.get_terminals(): node.name = d[node.name] Phylo.write(tree, newtreefile, 'newick') return newfname
def GetExec(): Recs = os.listdir(os.getcwd()) newList=[] j = 0 listdata=dict() k = 0 while k < len(Recs): (name, ext) = os.path.splitext(Recs[k]) if len(ext)>3 and ext[0:4]=='.dnd': tree = Phylo.read(Recs[k], "newick") tree.rooted = True newList.append([tree,'ok']) listdata[j] = j,str(Recs[k]) j+=1 elif len(ext)>3 and ext[0:4]=='.xml': tree = Phylo.read(Recs[k], "phyloxml") tree.rooted = True newList.append([tree,'ok']) listdata[j] = j,str(Recs[k]) j+=1 k += 1 return [newList,listdata]
def reroot_tree_with_outgroup(tree_name, outgroups): clade_outgroups = GubbinsCommon.get_monophyletic_outgroup(tree_name, outgroups) outgroups = [{'name': taxon_name} for taxon_name in clade_outgroups] tree = Phylo.read(tree_name, 'newick') tree.root_with_outgroup(*outgroups) Phylo.write(tree, tree_name, 'newick') tree = dendropy.Tree.get_from_path(tree_name, 'newick', preserve_underscores=True) tree.deroot() tree.update_splits() output_tree_string = tree.as_string( 'newick', taxon_set=None, suppress_leaf_taxon_labels=False, suppress_leaf_node_labels=True, suppress_internal_taxon_labels=False, suppress_internal_node_labels=False, suppress_rooting=True, suppress_edge_lengths=False, unquoted_underscores=True, preserve_spaces=False, store_tree_weights=False, suppress_annotations=True, annotations_as_nhx=False, suppress_item_comments=True, node_label_element_separator=' ', node_label_compose_func=None ) output_file = open(tree_name, 'w+') output_file.write(output_tree_string.replace('\'', '')) output_file.closed
def showMatplotlibTreeWindow(self): if self.chosenFileName == '': self.showOpenFileDialog() if self.tree != 0: self.tree.root.color = '#808080' Phylo.draw(self.tree, branch_labels = lambda c: c.branch_length)
def drawConsensusTreeBioNexus(self): if self.path1 != '' and self.path2 != '': #get files extensions self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:] self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:] #open tree files self.trees = [] #first tree self.f = open(self.path1, 'r') self.tree1 = Trees.Tree(self.f.read()) self.trees.append(self.tree1) self.f.close() #second tree self.f = open(self.path2, 'r') self.tree2 = Trees.Tree(self.f.read()) self.trees.append(self.tree2) self.f.close() #generate consensus tree self.consensus_tree = Trees.consensus(self.trees) #draw tree self.handle = StringIO(self.consensus_tree.to_string(plain_newick=True)) self.tree = Phylo.read(self.handle, 'newick') self.tree.root.color = '#808080' Phylo.draw(self.tree)
def main(): args = parse_args() tree = Phylo.read(args.input_file, args.input_type) tree = tree.as_phyloxml() if args.zchemat_kolorowania == 'eba': get_colors_and_groups = get_eukariota_group elif args.zchemat_kolorowania == 'fungi': get_colors_and_groups = get_fungus_groups elif args.zchemat_kolorowania == 'opisto': get_colors_and_groups = get_opisto_groups for branch in tree.get_nonterminals(): try: branch.confidence.type = "bootstrap" branch.name = branch.confidence.value except AttributeError: pass colors, list_of_groups = get_colors_and_groups() for leaf in tree.get_terminals(): name = leaf.name.strip() try: index = name.index(".") name = name[index + 3:] except: name = "_".join(name.split("_")[-2:]) for color, members in zip(colors, list_of_groups): if name in members: leaf.color = color Phylo.write(tree, args.output_file, "phyloxml")
def drawConsensusTreeDendropy(self): if self.path1 != '' and self.path2 != '': #get files extensions self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:] self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:] #open tree files self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileExtension1) self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileExtension2) #prepare tree list self.trees = dendropy.TreeList() self.trees.append(self.tree1) self.trees.append(self.tree2) #generate consensus tree self.consensus_tree = self.trees.consensus(min_freq=0.2) #draw tree self.handle = StringIO(self.consensus_tree._as_newick_string()) # POPRAWIONY BLAD Z KONWERSJA DO BUFORA #self.handle = StringIO(self.consensus_tree.to_string(plain_newick=True)) self.tree = Phylo.read(self.handle, 'newick') self.tree.root.color = '#808080' Phylo.draw(self.tree)
def write ( self, phytrees_file ) : """ Save all trees stored at the PhyTrees object in the 'phytrees_file' (in newick format). A file with a detailed report of the trees will be created replacing the extension of 'phytrees_file' by ".rep". If 'phytrees_file' contains a relative path, the current working directory will be used to get the absolute path. If any file already exists, it will be overwritten without warning. Arguments : phytrees_file ( string ) New PhyTrees tree file. Raises : IOError If the path provided doesn't exist. """ data_filepath = get_abspath(phytrees_file) report_filepath = os.path.splitext(data_filepath)[0] + '.rep' # Generate a single string with all the report content str_report = '\n'.join([' '.join(x) for x in self._report]) # Write all the information in the PhyTrees files try : Phylo.write(self.data, data_filepath, 'newick') with open(report_filepath, 'w') as report_file : report_file.write('Num. trees: {:d}\nHistory:\n' \ '{:s}'.format(len(self), str_report)) except IOError : raise except : if ( os.path.isfile(data_filepath) ) : os.remove(data_filepath) if ( os.path.isfile(report_filepath) ) : os.remove(report_filepath) raise
confidences = [] for c in clade.clades: #if not c.confidence is None: confidences.append(c.confidence) confidences += parse(c) return confidences texDir = '/Users/ahenschel/Github/FreqRT/ManuscriptBootstrapping/Tables' nwkFiles = [n for n in sys.argv[1:] if n.endswith('.nwk')] if not nwkFiles: print("Provide newick files through command line, e.g.: Data/*.nwk") else: data = [] for nwkFile in nwkFiles: tree = Phylo.read(nwkFile, format='newick') rec = infoFromName(nwkFile) confidences = np.array(parse(tree.root), dtype=np.float) nrNans = np.sum(np.isnan(confidences)) nrNansPct = nrNans / len(confidences) rec += (np.nanmean(confidences), np.nanmedian(confidences), nrNans, nrNansPct) data.append(rec) if True: plt.clf() Phylo.draw(tree) plt.savefig(f'{nwkFile[:-4]}.svg', figsize=(18, 12), dpi=300) plt.close() #input("Press enter") #break columns = 'Loci NrLoci Thresh Build Pops AvgConf MedConf Miss MissPct'.split(
def __init__(self, newick_tree): self.tree = Phylo.read( newick_tree, "newick", values_are_confidence=True, rooted=True )
def test_get_score(self): aln = AlignIO.read("TreeConstruction/msa.phy", "phylip") tree = Phylo.read("./TreeConstruction/upgma.tre", "newick") scorer = ParsimonyScorer() score = scorer.get_score(tree, aln) self.assertEqual(score, 2 + 1 + 2 + 2 + 1 + 1 + 1 + 3) alphabet = ["A", "T", "C", "G"] step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]] matrix = _Matrix(alphabet, step_matrix) scorer = ParsimonyScorer(matrix) score = scorer.get_score(tree, aln) self.assertEqual(score, 3.5 + 2.5 + 3.5 + 3.5 + 2.5 + 1 + 2.5 + 4.5) alphabet = [ "A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "1", "2", "T", "V", "W", "Y", "*", "-", ] step_matrix = [ [0], [2, 0], [1, 2, 0], [1, 2, 1, 0], [2, 1, 2, 2, 0], [1, 1, 1, 1, 2, 0], [2, 2, 1, 2, 2, 2, 0], [2, 2, 2, 2, 1, 2, 2, 0], [2, 2, 2, 1, 2, 2, 2, 1, 0], [2, 2, 2, 2, 1, 2, 1, 1, 2, 0], [2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0], [2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 0], [1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0], [2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 0], [2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0], [1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0], [2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 0], [1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0], [1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0], [2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 2, 0], [2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 3, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0], [2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 0], [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0 ], ] matrix = _Matrix(alphabet, step_matrix) scorer = ParsimonyScorer(matrix) score = scorer.get_score(tree, aln) self.assertEqual(score, 3 + 1 + 3 + 3 + 2 + 1 + 2 + 5)
def readtrees(self): return Phylo.parse(self.infile, 'newick')
def test_newick_read_scinot(self): """Parse Newick branch lengths in scientific notation.""" tree = Phylo.read(StringIO("(foo:1e-1,bar:0.1)"), 'newick') clade_a = tree.clade[0] self.assertEqual(clade_a.name, 'foo') self.assertAlmostEqual(clade_a.branch_length, 0.1)
def test_newick_read_multiple(self): """Parse a Nexus file with multiple trees.""" trees = list(Phylo.parse(EX_NEXUS, 'nexus')) self.assertEqual(len(trees), 3) for tree in trees: self.assertEqual(len(tree.get_terminals()), 9)
def setUp(self): self.phylogenies = list(Phylo.parse(EX_PHYLO, 'phyloxml'))
def test_newick(self): """Read a Newick file with one tree.""" tree = Phylo.read(EX_NEWICK, 'newick') self.assertEqual(len(tree.get_terminals()), 28)
print(err.decode("utf-8")) print(p2_status) f = open(sequence_ids_outfile, 'r') sequence_ids_output_file = csv.reader(f) sequence_header_list = {} for entry in sequence_ids_output_file: sequence_id = entry[0] description = entry[1] seq_type = entry[2] sequence_header_list[sequence_id] = [description, seq_type] f.close() if (p2_status == 0): best_tree = Phylo.read(mega_best_tree_infile, 'newick') print(best_tree) best_tree.root_with_outgroup( {'name': 'b30065_LXYB01000004_Acholeplasma_laidlawii_PG8R10'}) best_tree.rooted = True #Phylo.draw(best_tree) #pylab.savefig(mega_tree_filename + '.png') Phylo.write(best_tree, mega_tree_filename + '.nwk', 'newick') Phylo.write(best_tree, mega_tree_filename + '.xml', 'phyloxml') f_out = open(subgroups_tree_outfile, 'w') for fasta_record in SeqIO.parse(subgroups_tree_file, "fasta"): seq_id = fasta_record.id
if __name__ == "__main__": options = get_options() import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import seaborn as sns sns.set_style('white') import os import pandas as pd import numpy as np from Bio import Phylo t = Phylo.read(options.tree, 'newick') print ' Max distance to create better plots' mdist = max([t.distance(t.root, x) for x in t.get_terminals()]) print ' Load roary' roary = pd.read_table(options.spreadsheet, sep=',', low_memory=False) print ' Set index (group name)' roary.set_index('Gene', inplace=True) print ' Find tagged genes' if options.tag is not None: if "Inference" in roary.columns: roary_info = roary[["Inference"]].copy() roary.drop(["Inference"], axis=1, inplace=True) VF = roary_info[roary_info.applymap( lambda x: options.tag.upper() in x.upper()).values].index
from Bio import Phylo selgenes = ["rrs", "atpB"] sgstat = { "rrs": [1499, 18], "psaB": [1790, 21], "tufA": [1225, 13], "atpB": [3510, 66], "rpoB": [3765, 56] } #rnconv = { "p9" : "tufA", "p34" : "rpl19", "p4" : "psaB", "p6" : "atpB", "p2" : "rpoB" } fig, ax = plt.subplots(1, len(selgenes), figsize=(40, 20), dpi=80) for k in range(len(selgenes)): gene = selgenes[k] tree = Phylo.read("fragments_" + gene + ".nwk", 'newick') matplotlib.rcParams["font.size"] = 28 matplotlib.rcParams["lines.linewidth"] = 3 leafs = tree.get_terminals(order="postorder") bspnum = 0 spid = "bsponge" labelconv = { "p_salinaru": "Picocystis salinarum", "myrmecia": "Myrmecia israeliensis", "botyococcu": "Botryococcus braunii", "coccomyxa": "Coccomyxa subellipsoidea", "c_parasiti": "Choricystis parasitica", "bsponge": "Sponge symbiont", "h_reticala": "Hydrodictyon reticulatum", "m_jurisii": "Mychonastes jurisii",
return colors['darkblue'] elif c.split(" ")[0] == "GR": return colors['blue'] elif c.split(" ")[0] == "G": return colors['seagreen'] elif c.split(" ")[0] == "GH": return colors['green'] elif c.split(" ")[0] == "S": return colors['lime'] elif c.split(" ")[0] == "O": return colors['gold'] elif c.split(" ")[0] == "GV": return colors['darkorange'] elif c.split(" ")[0] == "V": return colors['tomato'] elif c.split(" ")[0] == "L": return colors['red'] matplotlib.rc('font', size=30) fig = plt.figure(figsize=(35, 25), dpi=100) axes = fig.add_subplot(1, 1, 1) Phylo.draw(tree, axes=axes, do_show=False, label_func=labels, label_colors=labels_col) plt.axis('off') plt.savefig("plots/tree3.svg", format="svg", transparent=True) Phylo.draw_ascii(tree)
#Alinhamento multiplo e arvore filogenética help(ClustalwCommandline) cline = ClustalwCommandline( "clustalw2", infile= "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Probitina_MA.fasta" ) print(cline) cline = MuscleCommandline( input= "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Proibitina_MA.fasta", out="Proibitina_MA.aln", clw=True) print(cline) #Leitura de ficheiro do alinhamento multiplo alignment = AlignIO.read( "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Proibitinalinhados.fasta", "fasta") print(alignment) #Leitura do ficheiro da arvore filogenética arvore = Phylo.read( "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Proibitinarvore.dnd", "newick") print(arvore) Phylo.draw_ascii(arvore)
ax.tick_params(labelsize=fs * 0.8) ax.set_ylim([0, 1.1 * np.max(yi)]) plt.tight_layout() plt.legend(fontsize=fs * 0.8) if __name__ == '__main__': import matplotlib.pyplot as plt import time plt.ion() # tree_file = '../data/H3N2_NA_allyears_NA.20.nwk' # date_file = '../data/H3N2_NA_allyears_NA.20.metadata.csv' tree_file = '../data/ebola.nwk' date_file = '../data/ebola.metadata.csv' T = Phylo.read(tree_file, 'newick') #T.root_with_outgroup('A/Canterbury/58/2000|CY009150|09/05/2000|New_Zealand||H3N2/8-1416') dates = {} with open(date_file) as ifile: ifile.readline() for line in ifile: if line[0] != '#': fields = line.strip().split(',') dates[fields[0]] = float(fields[1]) for l in T.get_terminals(): l.numdate = dates[l.name] branch_variance = lambda x: (x.branch_length + (0.0005 if x.is_terminal() else 0.0)) / 19000.0
def test_newick_read_single3(self): """Read Nexus file with one tree.""" tree = Phylo.read(EX_NEXUS2, "nexus") self.assertEqual(len(tree.get_terminals()), 658)
def GRAViTyDendrogramAndHeatmapConstruction( ShelveDir, IncludeIncompleteGenomes=False, SimilarityMeasurementScheme="PG", p=1, Dendrogram=True, Dendrogram_LinkageMethod="average", #"single", "complete", "average", "weighted" Bootstrap=True, N_Bootstrap=10, Bootstrap_method="booster", #"booster", "sumtrees" Bootstrap_N_CPUs=20, Heatmap=False, Heatmap_VirusOrderScheme=None, #"Filename", Heatmap_WithDendrogram=True, Heatmap_DendrogramFile=None, Heatmap_DendrogramSupport_Cutoff=0.75, VirusGrouping=True, ): print "################################################################################" print "#Generate GRAViTy dendrogram and heat map #" print "################################################################################" ''' Generate GRAViTy dendrogram and heat map --------------------------------------------- ''' ################################################################################ print "- Define dir/file paths" ################################################################################ print "\tto program output shelve" #------------------------------------------------------------------------------- VariableShelveDir = ShelveDir + "/Shelves" if Dendrogram == True: print "\t\tto virus dendrogram file" #------------------------------------------------------------------------------- VirusDendrogramFile = VariableShelveDir + "/Dendrogram.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.nwk" % ( str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, Dendrogram_LinkageMethod, p) if Bootstrap == True: print "\t\tto dendrogram distribution file" #------------------------------------------------------------------------------- VirusDendrogramDistFile = VariableShelveDir + "/DendrogramDist.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.nwk" % ( str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, Dendrogram_LinkageMethod, p) if os.path.isfile(VirusDendrogramDistFile): os.remove(VirusDendrogramDistFile) print "\t\tto bootstrapped dendrogram file" #------------------------------------------------------------------------------- BootstrappedVirusDendrogramFile = VariableShelveDir + "/BootstrappedDendrogram.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.nwk" % ( str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, Dendrogram_LinkageMethod, p) if os.path.isfile(BootstrappedVirusDendrogramFile): os.remove(BootstrappedVirusDendrogramFile) if Heatmap == True: print "\t\tto heat map file" #------------------------------------------------------------------------------- HeatmapFile = VariableShelveDir + "/Heatmap.IncompleteGenomes=%s.Scheme=%s.p=%s.pdf" % ( str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, p) if Heatmap_WithDendrogram == True: print "\t\tto heat map with dendrogram file" #------------------------------------------------------------------------------- HeatmapWithDendrogramFile = VariableShelveDir + "/HeatmapWithDendrogram.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.support_cutoff=%s.pdf" % ( str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, Dendrogram_LinkageMethod, p, Heatmap_DendrogramSupport_Cutoff) if Heatmap_DendrogramFile == None: if Dendrogram == True: if Bootstrap == True: Heatmap_DendrogramFile = BootstrappedVirusDendrogramFile elif Bootstrap == False: Heatmap_DendrogramFile = VirusDendrogramFile else: raise SystemExit("The dendrogram file is missing.") elif not os.path.isfile(Heatmap_DendrogramFile): raise SystemExit("Can't find %s." % Heatmap_DendrogramFile) if VirusGrouping == True: VirusGroupingFile = VariableShelveDir + "/VirusGrouping.IncompleteGenomes=%s.Scheme=%s.p=%s.txt" % ( str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, p) ################################################################################ print "- Retrieve variables" ################################################################################ if IncludeIncompleteGenomes == True: print "\tfrom ReadGenomeDescTable.AllGenomes.shelve" #------------------------------------------------------------------------------- VariableShelveFile = VariableShelveDir + "/ReadGenomeDescTable.AllGenomes.shelve" elif IncludeIncompleteGenomes == False: print "\tfrom ReadGenomeDescTable.CompleteGenomes.shelve" #------------------------------------------------------------------------------- VariableShelveFile = VariableShelveDir + "/ReadGenomeDescTable.CompleteGenomes.shelve" #VariableShelveFile = VariableShelveDir+"/ReadGenomeDescTable.shelve" Parameters = shelve.open(VariableShelveFile) for key in [ "SeqIDLists", "FamilyList", "GenusList", "VirusNameList", "TaxoGroupingList", ]: globals()[key] = Parameters[key] print "\t\t" + key Parameters.close() if IncludeIncompleteGenomes == True: print "\tfrom RefVirusAnnotator.AllGenomes.shelve" #------------------------------------------------------------------------------- VariableShelveFile = VariableShelveDir + "/RefVirusAnnotator.AllGenomes.shelve" elif IncludeIncompleteGenomes == False: print "\tfrom RefVirusAnnotator.CompleteGenomes.shelve" #------------------------------------------------------------------------------- VariableShelveFile = VariableShelveDir + "/RefVirusAnnotator.CompleteGenomes.shelve" #VariableShelveFile = VariableShelveDir+"/RefVirusAnnotator.shelve" Parameters = shelve.open(VariableShelveFile) for key in [ "PPHMMSignatureTable", "PPHMMLocationTable", "PPHMMSignatureTable_coo", "PPHMMLocationTable_coo", "GOMIDList", "GOMSignatureTable", ]: try: globals()[key] = Parameters[key] print "\t\t" + key except KeyError: pass Parameters.close() if "PPHMMSignatureTable_coo" in globals().keys(): globals()["PPHMMSignatureTable"] = PPHMMSignatureTable_coo.toarray() if "PPHMMLocationTable_coo" in globals().keys(): globals()["PPHMMLocationTable"] = PPHMMLocationTable_coo.toarray() ################################################################################ print "- Estimate virus pairwise distances" ################################################################################ SimMat = SimilarityMat_Constructor( PPHMMSignatureTable=PPHMMSignatureTable, GOMSignatureTable=GOMSignatureTable, PPHMMLocationTable=PPHMMLocationTable, SimilarityMeasurementScheme=SimilarityMeasurementScheme, p=p, ) DistMat = 1 - SimMat DistMat[DistMat < 0] = 0 if Dendrogram == True: ################################################################################ print "- Estimate the GRAViTy dendrogram" ################################################################################ #Make TaxoLabelList #--------------------------------------------------------------------- TaxoLabelList = TaxoLabel_Constructor(SeqIDLists=SeqIDLists, FamilyList=FamilyList, GenusList=GenusList, VirusNameList=VirusNameList) #Make dendrogram #--------------------------------------------------------------------- VirusDendrogram = DistMat2Tree( DistMat=DistMat, LeafList=TaxoLabelList, Dendrogram_LinkageMethod=Dendrogram_LinkageMethod) with open(VirusDendrogramFile, "w") as VirusDendrogram_txt: VirusDendrogram_txt.write(VirusDendrogram) if Bootstrap == True: ################################################################################ print "- Compute bootstrap support" ################################################################################ N_PPHMMs = PPHMMSignatureTable.shape[1] for Bootstrap_i in range(0, N_Bootstrap): print "\tRound %d" % (Bootstrap_i + 1) #------------------------------------------------------------------------------- print "\t\tConstruct bootstrapped PPHMMSignatureTable and PPHMMLocationTable" #------------------------------------------------------------------------------- PPHMM_IndexList = np.random.choice(range(N_PPHMMs), N_PPHMMs, replace=True) BootstrappedPPHMMSignatureTable = PPHMMSignatureTable[:, PPHMM_IndexList] BootstrappedPPHMMLocationTable = PPHMMLocationTable[:, PPHMM_IndexList] BootstrappedGOMSignatureTable = None if "G" in SimilarityMeasurementScheme: print "\t\tConstruct bootstrapped GOMSignatureTable" #------------------------------------------------------------------------------- BootstrappedGOMDB = GOMDB_Constructor( TaxoGroupingList=TaxoGroupingList, PPHMMLocationTable=BootstrappedPPHMMLocationTable, GOMIDList=GOMIDList) BootstrappedGOMSignatureTable = GOMSignatureTable_Constructor( PPHMMLocationTable=BootstrappedPPHMMLocationTable, GOMDB=BootstrappedGOMDB, GOMIDList=GOMIDList) print "\t\tConstruct a dendrogram from the bootstrapped data" #------------------------------------------------------------------------------- BootstrappedSimMat = SimilarityMat_Constructor( PPHMMSignatureTable=BootstrappedPPHMMSignatureTable, GOMSignatureTable=BootstrappedGOMSignatureTable, PPHMMLocationTable=BootstrappedPPHMMLocationTable, SimilarityMeasurementScheme=SimilarityMeasurementScheme, p=p, ) BootstrappedDistMat = 1 - BootstrappedSimMat BootstrappedDistMat[BootstrappedDistMat < 0] = 0 BootstrappedVirusDendrogram = DistMat2Tree( DistMat=BootstrappedDistMat, LeafList=TaxoLabelList, Dendrogram_LinkageMethod=Dendrogram_LinkageMethod) with open(VirusDendrogramDistFile, "a") as VirusDendrogramDist_txt: VirusDendrogramDist_txt.write(BootstrappedVirusDendrogram + "\n") print "\tCreat a bootstrapped dendrogram" #------------------------------------------------------------------------------- if Bootstrap_method == "booster": _ = subprocess.Popen( "booster -i %s -b %s -o %s -@ %d " % (VirusDendrogramFile, VirusDendrogramDistFile, BootstrappedVirusDendrogramFile, Bootstrap_N_CPUs), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) out, err = _.communicate() elif Bootstrap_method == "sumtrees": _ = subprocess.Popen( "sumtrees.py --decimals=2 --no-annotations --preserve-underscores --force-rooted --output-tree-format=newick --output-tree-filepath=%s --target=%s %s" % (BootstrappedVirusDendrogramFile, VirusDendrogramFile, VirusDendrogramDistFile), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) out, err = _.communicate() else: print "'Bootstrap_method' can either be 'booster' or 'sumtrees'." if Heatmap == True: ################################################################################ print "- Construct GRAViTy heatmap" ################################################################################ #Determine virus order #------------------------------------------------------------------------------- N_Viruses = len(DistMat) if Heatmap_VirusOrderScheme == None: VirusOrder = range(N_Viruses) elif os.path.isfile(Heatmap_VirusOrderScheme): with open(Heatmap_VirusOrderScheme, "r") as Heatmap_VirusOrderScheme_txt: VirusOrder = [ int(Virus_i.split("\r\n")[0].split("\n")[0]) for Virus_i in Heatmap_VirusOrderScheme_txt ] else: VirusOrder = range(N_Viruses) #Re-order the distance matrix #------------------------------------------------------------------------------- OrderedDistMat = DistMat[VirusOrder][:, VirusOrder] #Labels, label positions, and ticks #------------------------------------------------------------------------------- #ClassLabelList = np.array([TaxoGrouping.split("_")[1] if TaxoGrouping.startswith("_") else TaxoGrouping for TaxoGrouping in TaxoGroupingList[VirusOrder]]) ClassLabelList = np.array([ re.split("_|\*", TaxoGrouping)[1] if TaxoGrouping.startswith( ("_", "*")) else TaxoGrouping for TaxoGrouping in TaxoGroupingList[VirusOrder] ]) LineList = np.where(ClassLabelList[0:-1] != ClassLabelList[1:])[0] ClassLabelList = np.hstack( (ClassLabelList[LineList], ClassLabelList[-1])) LineList = LineList + 0.5 TickLocList = np.array( map(np.mean, (zip(np.hstack(([-0.5], LineList)), np.hstack((LineList, [len(TaxoGroupingList) - 0.5])))))) #Plot configuration #------------------------------------------------------------------------------- Heatmap_width = float(12) Heatmap_height = Heatmap_width TaxoLable_space = 1.00 CBar_Heatmap_gap = 0.05 CBar_width = Heatmap_width CBar_height = 0.25 CBarLable_space = 0.25 Outer_margin = 0.5 FontSize = 6 Fig_width = Outer_margin + Heatmap_width + TaxoLable_space + Outer_margin Fig_height = Outer_margin + CBarLable_space + CBar_height + CBar_Heatmap_gap + Heatmap_height + TaxoLable_space + Outer_margin ax_Heatmap_L = (Outer_margin + TaxoLable_space) / Fig_width ax_Heatmap_B = (Outer_margin + CBarLable_space + CBar_height + CBar_Heatmap_gap) / Fig_height ax_Heatmap_W = Heatmap_width / Fig_width ax_Heatmap_H = Heatmap_height / Fig_height ax_CBar_L = (Outer_margin + TaxoLable_space) / Fig_width ax_CBar_B = (Outer_margin + CBarLable_space) / Fig_height ax_CBar_W = CBar_width / Fig_width ax_CBar_H = CBar_height / Fig_height #Plot the heat map #------------------------------------------------------------------------------- fig = plt.figure(figsize=(Fig_width, Fig_height), dpi=300) ax_Heatmap = fig.add_axes( [ax_Heatmap_L, ax_Heatmap_B, ax_Heatmap_W, ax_Heatmap_H], frame_on=True, facecolor="white") Heatmap_Graphic = ax_Heatmap.imshow(OrderedDistMat, cmap='magma', aspect='auto', vmin=0, vmax=1, interpolation='none') for l in LineList: ax_Heatmap.axvline(l, color='k', lw=0.2) ax_Heatmap.axhline(l, color='k', lw=0.2) ax_Heatmap.set_xticks(TickLocList) ax_Heatmap.set_xticklabels(ClassLabelList, rotation=90, size=FontSize) ax_Heatmap.set_yticks(TickLocList) ax_Heatmap.set_yticklabels(ClassLabelList, rotation=0, size=FontSize) ax_Heatmap.tick_params(top='on', bottom='off', left='off', right='on', labeltop='on', labelbottom='off', labelleft='off', labelright='on', direction='out') ax_CBar = fig.add_axes([ax_CBar_L, ax_CBar_B, ax_CBar_W, ax_CBar_H], frame_on=True, facecolor="white") CBar_Graphic = fig.colorbar(Heatmap_Graphic, cax=ax_CBar, orientation="horizontal", ticks=[0, 0.25, 0.50, 0.75, 1]) CBar_Graphic.ax.set_xticklabels( ['0.00', '0.25', '0.50', '0.75', '1.00'], size=FontSize) CBar_Graphic.ax.set_xlabel('Distance', rotation=0, size=FontSize + 2) CBar_Graphic.ax.tick_params(top='off', bottom='on', left='off', right='off', labeltop='off', labelbottom='on', labelleft='off', labelright='off', direction='out') #Save the plot to file #------------------------------------------------------------------------------- plt.savefig(HeatmapFile, format="pdf") if Heatmap_WithDendrogram == True: ################################################################################ print "- Construct GRAViTy heat map with dendrogram" ################################################################################ N_Viruses = len(DistMat) #Load the tree #------------------------------------------------------------------------------- VirusDendrogram = Phylo.read(Heatmap_DendrogramFile, "newick") #Determine virus order #------------------------------------------------------------------------------- TaxoLabelList = TaxoLabel_Constructor(SeqIDLists=SeqIDLists, FamilyList=FamilyList, GenusList=GenusList, VirusNameList=VirusNameList) _ = VirusDendrogram.ladderize(reverse=True) OrderedTaxoLabelList = [ Clade.name for Clade in VirusDendrogram.get_terminals() ] VirusOrder = [ TaxoLabelList.index(TaxoLabel) for TaxoLabel in OrderedTaxoLabelList ] #Re-order the distance matrix #------------------------------------------------------------------------------- OrderedDistMat = DistMat[VirusOrder][:, VirusOrder] #Remove clade support values that are < Heatmap_DendrogramSupport_Cutoff #------------------------------------------------------------------------------- N_InternalNodes = len(VirusDendrogram.get_nonterminals()) for InternalNode_i in range(N_InternalNodes): if VirusDendrogram.get_nonterminals( )[InternalNode_i].confidence < Heatmap_DendrogramSupport_Cutoff or np.isnan( VirusDendrogram.get_nonterminals() [InternalNode_i].confidence): VirusDendrogram.get_nonterminals( )[InternalNode_i].confidence = "" else: VirusDendrogram.get_nonterminals( )[InternalNode_i].confidence = round( VirusDendrogram.get_nonterminals() [InternalNode_i].confidence, 2) #Labels, label positions, and ticks #------------------------------------------------------------------------------- Taxo2ClassDict = { TaxoLabel: TaxoGrouping for TaxoLabel, TaxoGrouping in zip(TaxoLabelList, TaxoGroupingList) } ClassDendrogram = copy(VirusDendrogram) for Clade in ClassDendrogram.find_clades(terminal=True): Clade.name = Taxo2ClassDict[Clade.name] ClassLabelList = [] LineList = [-1] TerminalNodeList = [ TerminalNode for TerminalNode in ClassDendrogram.get_terminals() ] while len(TerminalNodeList) != 0: FarLeftNode = TerminalNodeList[0] for Clade in ([ClassDendrogram] + ClassDendrogram.get_path(FarLeftNode)): DescendantNodeList = Clade.get_terminals() DescendantClassLabelList = list( set(map(lambda c: c.name, DescendantNodeList))) if len(DescendantClassLabelList) == 1: ClassLabelList.append(DescendantClassLabelList[0]) LineList.append(LineList[-1] + len(DescendantNodeList)) TerminalNodeList = TerminalNodeList[len(DescendantNodeList ):] break ClassLabelList = np.array(ClassLabelList) LineList = np.array(LineList) + 0.5 TickLocList = np.array(map(np.mean, zip(LineList[0:-1], LineList[1:]))) #Plot configuration #------------------------------------------------------------------------------- Heatmap_width = float(12) Heatmap_height = Heatmap_width TaxoLable_space = 1.00 CBar_Heatmap_gap = 0.05 CBar_width = Heatmap_width CBar_height = 0.25 CBarLable_space = 0.25 Dendrogram_width = Heatmap_width / 3 Dendrogram_height = Heatmap_height Dendrogram_Heatmap_gap = 0.1 ScaleBar_Dendrogram_gap = CBar_Heatmap_gap ScaleBar_width = Dendrogram_width ScaleBar_height = CBar_height ScaleBarLable_space = CBarLable_space Outer_margin = 0.5 FontSize = 6 Fig_width = Outer_margin + Dendrogram_width + Dendrogram_Heatmap_gap + Heatmap_width + TaxoLable_space + Outer_margin Fig_height = Outer_margin + CBarLable_space + CBar_height + CBar_Heatmap_gap + Heatmap_height + TaxoLable_space + Outer_margin ax_Dendrogram_L = Outer_margin / Fig_width ax_Dendrogram_B = (Outer_margin + ScaleBarLable_space + ScaleBar_height + ScaleBar_Dendrogram_gap) / Fig_height ax_Dendrogram_W = Dendrogram_width / Fig_width ax_Dendrogram_H = Dendrogram_height / Fig_height ax_ScaleBar_L = Outer_margin / Fig_width ax_ScaleBar_B = (Outer_margin + ScaleBarLable_space) / Fig_height ax_ScaleBar_W = ScaleBar_width / Fig_width ax_ScaleBar_H = ScaleBar_height / Fig_height ax_Heatmap_L = (Outer_margin + Dendrogram_width + Dendrogram_Heatmap_gap) / Fig_width ax_Heatmap_B = (Outer_margin + CBarLable_space + CBar_height + CBar_Heatmap_gap) / Fig_height ax_Heatmap_W = Heatmap_width / Fig_width ax_Heatmap_H = Heatmap_height / Fig_height ax_CBar_L = (Outer_margin + Dendrogram_width + Dendrogram_Heatmap_gap) / Fig_width ax_CBar_B = (Outer_margin + CBarLable_space) / Fig_height ax_CBar_W = CBar_width / Fig_width ax_CBar_H = CBar_height / Fig_height #Plot the heat map #------------------------------------------------------------------------------- fig = plt.figure(figsize=(Fig_width, Fig_height), dpi=300) ax_Dendrogram = fig.add_axes([ ax_Dendrogram_L, ax_Dendrogram_B, ax_Dendrogram_W, ax_Dendrogram_H ], frame_on=False, facecolor="white") Phylo.draw(VirusDendrogram, label_func=lambda x: "", do_show=False, axes=ax_Dendrogram) VirusDendrogramDepth = max( [v for k, v in VirusDendrogram.depths().iteritems()]) ax_Dendrogram.set_xlim([(VirusDendrogramDepth - 1), VirusDendrogramDepth]) ax_Dendrogram.set_ylim([N_Viruses + 0.5, 0.5]) ax_Dendrogram.set_axis_off() ax_ScaleBar = fig.add_axes( [ax_ScaleBar_L, ax_ScaleBar_B, ax_ScaleBar_W, ax_ScaleBar_H], frame_on=False, facecolor="white") ax_ScaleBar.plot([0, 1], [0, 0], 'k-') ScaleBarTicks = [0, 0.25, 0.5, 0.75, 1] for Tick in ScaleBarTicks: ax_ScaleBar.plot([Tick, Tick], [-0.05, 0.05], 'k-') ax_ScaleBar.set_xlim([1, 0]) ax_ScaleBar.set_xticks(ScaleBarTicks) ax_ScaleBar.set_xticklabels(map(str, ScaleBarTicks), rotation=0, size=FontSize) ax_ScaleBar.set_xlabel('Distance', rotation=0, size=FontSize + 2) ax_ScaleBar.xaxis.set_label_position('bottom') ax_ScaleBar.tick_params(top='off', bottom='off', left='off', right='off', labeltop='off', labelbottom='on', labelleft='off', labelright='off', direction='out') ax_Heatmap = fig.add_axes( [ax_Heatmap_L, ax_Heatmap_B, ax_Heatmap_W, ax_Heatmap_H], frame_on=True, facecolor="white") Heatmap_Graphic = ax_Heatmap.imshow(OrderedDistMat, cmap='magma', aspect='auto', vmin=0, vmax=1, interpolation='none') for l in LineList: ax_Heatmap.axvline(l, color='k', lw=0.2) ax_Heatmap.axhline(l, color='k', lw=0.2) ax_Heatmap.set_xticks(TickLocList) ax_Heatmap.set_xticklabels(ClassLabelList, rotation=90, size=FontSize) ax_Heatmap.set_yticks(TickLocList) ax_Heatmap.set_yticklabels(ClassLabelList, rotation=0, size=FontSize) ax_Heatmap.tick_params(top='on', bottom='off', left='off', right='on', labeltop='on', labelbottom='off', labelleft='off', labelright='on', direction='out') ax_CBar = fig.add_axes([ax_CBar_L, ax_CBar_B, ax_CBar_W, ax_CBar_H], frame_on=True, facecolor="white") CBar_Graphic = fig.colorbar(Heatmap_Graphic, cax=ax_CBar, orientation="horizontal", ticks=[0, 0.25, 0.50, 0.75, 1]) CBar_Graphic.ax.set_xticklabels(['0', '0.25', '0.50', '0.75', '1'], rotation=0, size=FontSize) CBar_Graphic.ax.set_xlabel('Distance', rotation=0, size=FontSize + 2) CBar_Graphic.ax.tick_params(top='off', bottom='on', left='off', right='off', labeltop='off', labelbottom='on', labelleft='off', labelright='off', direction='out') #Save the plot to file #------------------------------------------------------------------------------- plt.savefig(HeatmapWithDendrogramFile, format="pdf") if VirusGrouping == True: ################################################################################ print "- Virus grouping" ################################################################################ from GRAViTy.Utilities.OrderedSet import OrderedSet from GRAViTy.Utilities.VirusGrouping_Estimator import VirusGrouping_Estimator (VirusGroupingList, OptDistance_Cutoff, CorrelationScore, Theils_u_TaxoGroupingListGivenPred, Theils_u_PredGivenTaxoGroupingList) = VirusGrouping_Estimator( DistMat, Dendrogram_LinkageMethod, TaxoGroupingList) np.savetxt( fname=VirusGroupingFile, X=np.column_stack(( map(", ".join, SeqIDLists), FamilyList, GenusList, VirusNameList, TaxoGroupingList, VirusGroupingList, )), fmt='%s', delimiter="\t", header= "Sequence identifier\tFamily\tGenus\tVirus name\tClass\tGrouping") with open(VirusGroupingFile, "a") as VirusGrouping_txt: VirusGrouping_txt.write( "\n" + "Distance cut off: %s\n" % OptDistance_Cutoff + "Theil's uncertainty correlation for the reference assignments given the predicted grouping U(Ref|Pred): %s\n" % Theils_u_TaxoGroupingListGivenPred + "Theil's uncertainty correlation for the predicted grouping given the reference assignments U(Pred|Ref): %s\n" % Theils_u_PredGivenTaxoGroupingList + "Symmetrical Theil's uncertainty correlation between the reference assignments and the predicted grouping U(Ref, Pred): %s\n" % CorrelationScore + "U(X|Y) == 1 means that knowing Y implies a perfect knowledge of X, but not vice-versa\n" + "U(X,Y) == 1 means that knowing Y implies a perfect knowledge of X and vice-versa\n" )
def test_int_labels(self): """Read newick formatted tree with numeric labels.""" tree = Phylo.read(StringIO("(((0:0.1,1:0.1)0.99:0.1,2:0.1)0.98:0.0);"), "newick") self.assertEqual({leaf.name for leaf in tree.get_terminals()}, {"0", "1", "2"})
def test_unicode_exception(self): """Read a Newick file with a unicode byte order mark (BOM).""" with open(EX_NEWICK_BOM, encoding="utf-8") as handle: tree = Phylo.read(handle, "newick") self.assertEqual(len(tree.get_terminals()), 3)
def test_convert_phyloxml_binary(self): """Try writing phyloxml to a binary handle; fail on Py3.""" trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") with tempfile.NamedTemporaryFile(mode="wb") as out_handle: self.assertRaises(TypeError, Phylo.write, trees, out_handle, "phyloxml")
def test_newick_read_single1(self): """Read first Newick file with one tree.""" tree = Phylo.read(EX_NEWICK, "newick") self.assertEqual(len(tree.get_terminals()), 28)
def get_newick_tree_str(filename): tree = Phylo.parse(filename, 'newick').next() # output = StringIO.StringIO() output = io.StringIO() Phylo.write(tree, output, 'newick') return output.getvalue()
def test_convert_phyloxml_text(self): """Write phyloxml to a text handle.""" trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") with tempfile.NamedTemporaryFile(mode="w") as out_handle: count = Phylo.write(trees, out_handle, "phyloxml") self.assertEqual(13, count)
type=int, default=100, help='Resolution for numerical solution of ODE.') parser.add_argument('-delimiter', default=None, help='Field separator for node names in tree.') parser.add_argument('-position', type=int, default=-1, help='Python index of field with tip date.') args = parser.parse_args() # open and parse tree try: tree = Phylo.read(args.tree, 'newick') except: print 'ERROR: Failed to parse tree from file', args.tree raise tree_height = max(tree.depths().values()) tips = tree.get_terminals() ntips = len(tips) if args.delimiter is None: tip_heights = [0.] * ntips else: maxdate = 0 tipdates = [] for tip in tips: try:
print('Save alignment in PHYLIP formats') fn_ali_trim = '/tmp/largest_clone_ali_trim.phy' AlignIO.write(ali_unique, fn_ali_trim, 'phylip') print('Infer tree') res = sp.run( 'phyml -i {:} -d nt'.format(fn_ali_trim), shell=True, stdout=sp.PIPE, ) print('Rename and bush up leaves') fn_tree = '/tmp/largest_clone_ali_trim.phy_phyml_tree.txt' from Bio import Phylo tree = Phylo.read('/tmp/largest_clone_ali_trim.phy_phyml_tree.txt', format='newick') leaves = tree.get_terminals() for leaf in leaves: lid = seqs_unique[int(leaf.name.split('_')[-1]) - 1] if len(seqs_red[lid]) == 1: leaf.name = seqs_red[lid][0] else: # Grow a hanging subtree for lid in seqs_red[lid]: leaf.clades.append( leaf.__class__( branch_length=1e-9, name=lid, )) tree.root.name = 'root'
geneID, geneFct = [], [] outfile1 = open("single_origin.txt", "w") outfile2 = open("double_origin.txt", "w") for line in IDfile: line = line.rstrip() geneID.append(line.split('\t')[0]) geneFct.append(line.split('\t')[1]) for i in os.listdir(inDIR): if 'best' not in i: continue mynode, mynode1, mynode2 = False, False, False infile = open(inDIR + i, "r") line = infile.readline().rstrip() handle = StringIO(line) tree = Phylo.read(handle, "newick") for node in tree.get_nonterminals(): subnodes = set([x.name for x in node.get_terminals()]) if subnodes == campan1: mynode1 = True elif subnodes == campan2: mynode2 = True elif subnodes == campan: mynode = True if mynode == True: for x in xrange(len(geneID)): if geneID[x] in i: outfile1.write("%s\t%s\t%s\n" % (geneID[x], geneFct[x], line))
def newick2phylo(self, nwk): handle = StringIO(nwk) phy = Phylo.read(handle, 'newick') return phy
#!/usr/bin/env python import sys from Bio import Phylo Phylo.convert(sys.argv[1], 'newick', sys.stdout, 'nexus')
def phylo_from_str(nwk): """ Returns a Phylo.BaseTree object given Newick string """ handle = StringIO() handle.write(nwk) handle.seek(0) return Phylo.read(handle, "newick")
def build_tree_fasttree(filename_or_ali, rootname=None, VERBOSE=0): '''Build phylogenetic tree using FastTree Parameters: filename_or_ali: filename of a FASTA multiple sequence alignment, or a Biopython alignment itself rootname (str): name of the leaf that should be the new root (outgroup) VERBOSE (int): verbosity level ''' import os import subprocess as sp import StringIO from Bio import Phylo import numpy as np from ..filenames import fasttree_bin if isinstance(filename_or_ali, basestring): filename = filename_or_ali else: from Bio import AlignIO ali = filename_or_ali tmp_folder = os.getenv('HOME') + '/tmp/' filename = tmp_folder + 'tmp_fasttree_' + str( np.random.randint(1000000000)) + '.fasta' AlignIO.write(ali, filename, 'fasta') try: if VERBOSE >= 3: output = sp.check_output([fasttree_bin, '-nt', filename]) else: output = sp.check_output([fasttree_bin, '-nt', filename], stderr=sp.STDOUT) tree_string = output.split('\n')[-2] tree = Phylo.read(StringIO.StringIO(tree_string), 'newick') tree.root.branch_length = 0.001 if rootname is not None: if VERBOSE >= 2: print 'Reroot' for leaf in tree.get_terminals(): if leaf.name == rootname: root = leaf break else: raise ValueError('Initial reference not found in tree') tree.root_with_outgroup(leaf) finally: if filename_or_ali != filename: os.remove(filename) # NOTE: nice fasttree trims sequence names at the first bracket, restore them if VERBOSE >= 2: print 'Check leaf labels integrity' if filename_or_ali == filename: from Bio import AlignIO ali = AlignIO.read(filename_or_ali, 'fasta') else: ali = filename_or_ali seq_names = set(seq.name for seq in ali) leaves_miss = set() for leaf in tree.get_terminals(): if leaf.name in seq_names: seq_names.remove(leaf.name) else: leaves_miss.add(leaf) if len(leaves_miss): if VERBOSE >= 2: print 'Correcting leaf integrity' for leaf in leaves_miss: for name in seq_names: if name.split('(')[0] == leaf.name: leaf.name = name seq_names.remove(name) break else: print 'Leaf has unexpected (truncated?) name:', leaf.name return tree
def load_tree(tree_path: str, patient_zero: str): """Load a ML tree file (nwk format) into a Phylo tree object `patient_zero` is a user-specified reference name for rooting the tree""" tree = next(Phylo.parse(tree_path, 'newick')) tree.root_with_outgroup(patient_zero) return get_tree_coords(tree)