def run(): # Parse options parser = optparse.OptionParser(__doc__) parser.add_option('-a', '--attribute', dest="attribute", default=None) parser.add_option('-d', '--dpi', type="int", default=None) parser.add_option('-H', '--height', type="int", dest="h", default=None) parser.add_option('-l', '--label', default="name") parser.add_option('-m', '--multiple', default=False, action="store_true") parser.add_option('-o', '--output', default=None) parser.add_option('-u', '--units', default="px") parser.add_option('-w', '--width', type="int", dest="w", default=None) options, files = parser.parse_args() # Setup TreeStyle ts = ete2.TreeStyle() ts.show_scale = False ts.show_branch_support = True # Read trees for n, line in enumerate(fileinput.input(files)): t = ete2.Tree(line) # Add faces if options.attribute: values = set( [getattr(l, options.attribute) for l in t.get_leaves()]) colours = get_colour_set(len(values)) colour_map = dict(zip(values, colours)) for l in t.iter_leaves(): mycolour = colour_map[getattr(l, options.attribute)] l.add_face( ete2.CircleFace(radius=10, color=mycolour, style="sphere"), 0) for l in t.iter_leaves(): l.add_face(ete2.TextFace(getattr(l, options.label)), 1) # Plot or save if options.output: kw = {} if options.h or options.w: for o in ("h", "w", "units", "dpi"): if getattr(options, o): kw[o] = getattr(options, o) if options.multiple: base, ext = os.path.splitext(options.output) filename = base + ("_%06d" % (n + 1)) + ext else: filename = options.output t.render(filename, ultrametric, tree_style=ts, **kw) else: t.show(ultrametric, tree_style=ts) if not options.multiple: return 0 return 0
def draw_ete2_tree(organism, snplist, tree_file_name, config, c): '''Draws a phylogenetic tree using ETE2 Keyword arguments: organism -- the organism of which to make a tree snplist -- a list of the SNP names, positions and state file_name -- the name of the out-file _tree.pdf will be added ''' newick = tree_to_newick(organism, config, c) tree = ete2.Tree(newick, format=1) tree_depth = int(tree.get_distance(tree.get_farthest_leaf()[0])) for n in tree.traverse(): # Nodes are set to red colour nstyle = ete2.NodeStyle() nstyle["fgcolor"] = "#BE0508" nstyle["size"] = 10 nstyle["vt_line_color"] = "#000000" nstyle["hz_line_color"] = "#000000" nstyle["vt_line_type"] = 0 nstyle["hz_line_type"] = 0 nstyle["vt_line_width"] = 2 nstyle["hz_line_width"] = 2 for snp in snplist: if n.name == snp[0]: if snp[1] == snp[3]: # If the SNP is Derived in snplist, # change appearance of node nstyle["fgcolor"] = "#99FF66" nstyle["size"] = 15 nstyle["vt_line_color"] = "#000000" nstyle["hz_line_color"] = "#000000" nstyle["vt_line_type"] = 0 nstyle["hz_line_type"] = 0 elif snp[3] == "-": # If the SNP is missing due to a gap, make it grey nstyle["fgcolor"] = "#DDDDDD" nstyle["size"] = 10 nstyle["vt_line_color"] = "#DDDDDD" nstyle["hz_line_color"] = "#DDDDDD" nstyle["vt_line_type"] = 1 nstyle["hz_line_type"] = 1 n.set_style(nstyle) ts = ete2.TreeStyle() ts.show_leaf_name = False # Do not print(leaf names, they are added in layout) ts.show_scale = False # Do not show the scale ts.layout_fn = CanSNPer_tree_layout # Use the custom layout ts.optimal_scale_level = 'full' # Fully expand the branches of the tree if config["dev"]: print("#[DEV] Tree file: %s" % tree_file_name) tree.render(tree_file_name, tree_style=ts, w=tree_depth * 500)
def render_tree(self): newick=make_newick(self)+';' # countleft=0 # countright=0 # for char in newick: # if char=='(': # countleft+=1 # elif char==')': # countright+=1 # print countleft,' ',countright # print newick self.newick=ete2.Tree(newick,format=8) ts=ete2.TreeStyle() ts.rotation=90 #self.newick.show(tree_style=ts) self.newick.show(tree_style=ts)
def save_tree_to_file(self,filepath): newick=make_newick(self)+';' # countleft=0 # countright=0 # for char in newick: # if char=='(': # countleft+=1 # elif char==')': # countright+=1 # print countleft,' ',countright # print newick self.newick=ete2.Tree(newick,format=1) ts=ete2.TreeStyle() ts.rotation=90 #self.newick.show(tree_style=ts) self.newick.render(filepath,w=500,tree_style=ts)
def _make_tree_figure(self, tree, fig, colors, orders, root_name, scale=None, branch_vert_margin=None, fontsize=12, show_names=True, name_field='seq_id', rename_function=None, color_node_labels=False, label_colors=None, tree_orientation=0, min_order_fraction=0.1, show_root_name=False, chain=None, linked_alignment=None, alignment_fontsize=11, alignment_height=50, alignment_width=50, compact_alignment=False, scale_factor=1, linewidth=1, show_scale=False): if show_names is True: if chain == 'heavy': show_names = [ p.heavy[name_field] for p in self.pairs if p.heavy is not None ] else: show_names = [ p.light[name_field] for p in self.pairs if p.light is not None ] elif show_names is False: show_names = [] if show_root_name is True: show_names.append(root_name) if linked_alignment is not None: t = ete2.PhyloTree(tree, alignment=linked_alignment, alg_format='fasta') ete2.faces.SequenceItem = MySequenceItem else: t = ete2.Tree(tree) t.set_outgroup(t & root_name) # style the nodes for node in t.traverse(): if orders is not None: leaves = node.get_leaf_names() order_count = Counter([orders[l] for l in leaves]) for order in sorted(order_count.keys()): if float(order_count[order]) / len( leaves) >= min_order_fraction: color = colors[order] break else: color = colors.get(node.name, '#000000') if linked_alignment is not None: node.add_feature('aln_fontsize', alignment_fontsize) node.add_feature('aln_height', alignment_height) node.add_feature('aln_width', alignment_width) node.add_feature('fontsize', fontsize) node.add_feature('format', 'seq') node.add_feature('scale_factor', scale_factor) style = ete2.NodeStyle() style['size'] = 0 style['vt_line_width'] = float(linewidth) style['hz_line_width'] = float(linewidth) style['vt_line_color'] = color style['hz_line_color'] = color style['vt_line_type'] = 0 style['hz_line_type'] = 0 # else: # style['size'] = 0 # style['vt_line_width'] = float(linewidth) # style['hz_line_width'] = float(linewidth) # style['vt_line_color'] = color # style['hz_line_color'] = color # style['vt_line_type'] = 0 # style['hz_line_type'] = 0 if node.name in show_names: if color_node_labels: if label_colors is None: node_color = color elif type(label_colors) == dict: node_color = label_colors.get(node.name, '#000000') elif type(label_colors) in [list, tuple]: node_color = color if node.name in label_colors else '#000000' else: node_color = '#000000' else: node_color = '#000000' node_name = node.name if rename_function is None else rename_function( node.name) tf = ete2.TextFace(node_name, fsize=fontsize, fgcolor=node_color) # tf.fsize = fontsize node.add_face(tf, column=0) # style['fgcolor'] = hex_to_rgb(node_color) # else: # if hasattr(node, "sequence"): # node.add_face(ete2.SeqMotifFace(seq=node.sequence, # seqtype="aa", # height=50, # seq_format="seq"), column=0, position="aligned") node.set_style(style) t.dist = 0 ts = ete2.TreeStyle() if linked_alignment is not None: ts.layout_fn = self._phyloalignment_layout_function ts.orientation = tree_orientation ts.show_leaf_name = False if scale is not None: ts.scale = int(scale) if branch_vert_margin is not None: ts.branch_vertical_margin = float(branch_vert_margin) ts.show_scale = show_scale # ladderize t.ladderize() # render the tree t.render(fig, tree_style=ts)
def tree_draw(tree_file, tree_name=None, order_vector_file=None, cell_colors_file=None, clustering_colors_file=None, clustering_sizes_file=None, intermediate_node_sizes_file=None, intermediate_node_labels_file=None, leaf_labels_file=None, legend_file=None, duplicate_file=None, tree_scale='linear', tree_rotation=True, font_size=7, font_legend=7, node_size=3, scale_rate=None, distance_factor=1, y_scale=False): t = ete2.Tree(newick=tree_file, format=1) ts = ete2.TreeStyle() if tree_rotation: ts.rotation = 90 ts.show_leaf_name = True ts.show_scale = False ts.scale = 1 if tree_name: ts.title.add_face(ete2.TextFace(tree_name, fsize=20), column=0) styles = {} max_dist = 0 # initialize all nodes and branches for n in t.traverse(): styles[n.name] = dict() styles[n.name]['style'] = ete2.NodeStyle() styles[n.name]['style']['fgcolor'] = 'black' max_dist = max(max_dist, n.dist) # calculate the scale for the tree (log, linear and right size) if tree_scale == 'log': max_dist = 0 root = t.get_tree_root() last_leaf = root.get_farthest_leaf() ts.y_axis['scale_min_value'] = root.dist ts.y_axis['scale_max_value'] = last_leaf.dist for n in t.traverse(): if tree_scale == 'log': if n == root: styles[n.name]['dist'] = 0 else: father_path = 0 for ancestor in n.get_ancestors(): father_path += styles[ancestor.name]['dist'] dist = math.log10(n.get_distance(root) * distance_factor + 1) - father_path if dist < 0: dist = 0 styles[n.name]['dist'] = dist max_dist = max(max_dist, dist) elif tree_scale == 'linear': if max_dist > 1: styles[n.name]['dist'] = round(n.dist / max_dist) else: styles[n.name]['dist'] = n.dist # leaf styles and update distance if not scale_rate: scale_rate = max(1000, round(1 / max_dist)) for n in t.traverse(): if 'dist' in styles[n.name]: n.dist = styles[n.name]['dist'] * scale_rate if not n.is_leaf(): styles[n.name]['style']["size"] = 0 else: styles[n.name]['style']["size"] = node_size # add bootstrap values to the branches (size of the node) if intermediate_node_sizes_file: bootsrtap_sizes = utils.get_bootsrtap_size( intermediate_node_sizes_file) for branch, size in bootsrtap_sizes.iteritems(): styles[branch]['style']["size"] = size styles[branch]['style']['fgcolor'] = 'black' # add colors to the leafs if cell_colors_file: cells_colors = utils.get_cells_colors(cell_colors_file) for name, color in cells_colors.iteritems(): styles[name]['style']['fgcolor'] = color # reorder the tree by pre-proses if possible if order_vector_file: leaf_order = utils.get_leaf_order(order_vector_file) for n in t.traverse('postorder'): if n.get_descendants(): a = '' for leaf in n.get_descendants(strategy='postorder'): if leaf.is_leaf(): if not a: a = leaf b = n.get_descendants(strategy='preorder')[-1] if a.is_leaf() and b.is_leaf(): if leaf_order[a.name] > leaf_order[b.name]: left, right = n.children n.children = [right, left] # add width to branches if clustering_sizes_file: t, styles = size_clustering(t, styles, clustering_sizes_file) # add colors to branches if clustering_colors_file: t, ts, styles = color_clustering(t, ts, styles, clustering_colors_file) # add new leaf labels if leaf_labels_file: cells_labels = utils.get_cells_labels(leaf_labels_file) ts.show_leaf_name = False for name, label in cells_labels.iteritems(): nodes = t.search_nodes(name=name) assert len(nodes) == 1 node = nodes[0] if name in cells_colors: name_face = ete2.faces.TextFace(cells_labels[name], fsize=font_size, fgcolor=cells_colors[name]) else: name_face = ete2.faces.TextFace(cells_labels[name], fsize=font_size) name_face.margin_left = 3 node.add_face(name_face, column=0) # add duplicate tags to nodes if duplicate_file: dup_labels = utils.get_dup_labels(duplicate_file) for name, color in dup_labels.iteritems(): node = node_check(name, t) if not node: continue dup_face = ete2.faces.TextFace('*', fsize=10, fgcolor=color) dup_face.margin_left = 5 node.add_face(dup_face, column=1) # add legend to the tree if legend_file: legend = utils.get_legend(legend_file) for mark in legend.keys(): ts.legend.add_face(ete2.faces.CircleFace(2, legend[mark]), column=0) legend_txt = ete2.faces.TextFace(mark, fsize=font_legend) legend_txt.margin_left = 5 ts.legend.add_face(legend_txt, column=1) ts.legend_position = 4 # add y-scale to the picture if y_scale: ts.y_axis['scale_type'] = tree_scale ts.y_axis['scale_length'] = last_leaf.dist - root.dist # set all the styles for n in t.traverse(): if n.name == 'IDroot': n.dist = 0 n.delete() if n.is_root(): n.dist = 0 n.delete() n.set_style(styles[n.name]['style']) root = ete2.faces.CircleFace(2, 'white') root.border.width = 1 root.border.color = 'black' t.add_face(root, column=0, position='float') # t.render("%%inline", tree_style=ts) return t, ts
def main(): class MyParser(argparse.ArgumentParser): def error(self, message): sys.stderr.write('[ERROR]: error: %s\n' % message) self.print_help() sys.exit(2) # parse command line argparser = MyParser() argparser.usage = '------------\n%(prog)s -l [FILE] -g [PATH] -p [PATH] -n [PATH] -t [THREADS] --noFilter/--Filter' argparser.description = 'Accepts MultiMSOAR2 output and all_vs_all blast to classify genes into summarized orthogroups and supergroups and gene duplications and birth.' argparser.epilog = '--------------' argparser.add_argument( "-l", required=True, metavar='FILE', dest='genelist', help= "File with list of all genes in each species - format is 1 line per species" ) argparser.add_argument( "-g", required=True, metavar='PATH', dest='ORGPATH', help="Path to all pairwise ortholog files {WKDIR/MultiMSOAR_inputs") argparser.add_argument("-p", required=True, metavar='PATH', dest='PEPPATH', help="Path to all peptide files {WKDIR") argparser.add_argument("-n", required=True, metavar='PATH', dest='NUCPATH', help="Path to all nucleotide files {WKDIR") argparser.add_argument("-t", default=1, metavar='NUM', dest='threads', help="Number of threads to run on") argparser.add_argument( "--noFilter", action='store_false', default=False, dest='filter', help="Set to switch off dynamic filtering of ortholog groups") argparser.add_argument( "--Filter", action='store_true', default=False, dest='filter', help="Set to switch off dynamic filtering of ortholog groups") argparser.add_argument( "--PF-prep", action='store_true', default=False, dest='PF', help="Set to switch off dynamic filtering of ortholog groups") argparser.add_argument( "--outgroup", required=True, metavar='PATH', dest='outgroup', help= "Comma separated list of outgroups . eg. SP1 if 1 species\nOR\nSP1,SP2 for more than 1 outgroup" ) if len(sys.argv) == 1: argparser.print_help() sys.exit(1) args = argparser.parse_args() PEPFILES = glob.glob(args.PEPPATH + "/*.pep") NUCFILES = glob.glob(args.NUCPATH + "/*.nuc") ORGFILES = glob.glob(args.ORGPATH + "/S*_S?*") ######################################################## """ Parse the list of genes in each species """ FILE1 = open(args.genelist).readlines() print ODIR spp = [] gene_dict = {} #DICT{GENE:SPP} for f in FILE1: line = f.split("\t") species = line[0] spp.append(species) for x in range(1, len(line)): gene_rec = line[x] gene_dict[gene_rec] = species sys.stdout.write("#\ntotal number of species catalogued:" + str(len(spp))) sys.stdout.write("#\nTotal genes in gene_dict =" + str(len(gene_dict)) + "\n") NUMSPP = len(spp) # Protein list PEPS = concat_files(PEPFILES, "all_prots.fa", type="fasta") PROT_LENGTH = {} for f in PEPS.keys(): L = len(PEPS[f]) PROT_LENGTH[f] = L # Nucleotide list NUCS = concat_files(NUCFILES, "all_nucs.fa", type="fasta") #remove_files(["all_nucs.fa","all_prots.fa"]) ######################################################## """ Create the MCL clusters """ OGF = concat_files(ORGFILES, 'orthologs_cat.txt', type="orthologs") # with open('orthologs_cat.txt', 'a') as outfile: # for fname in ORGFILES: # with open(fname) as infile: # outfile.write(infile.read()) # MCL command: sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ": Completed concatenating %d pairwise orthologs. Proceeding to run MCL\n" % OGF) command = "mcl orthologs_cat.txt --abc -te %d -I 2.0 -o ortholog_markov_clusters.txt" % int( args.threads) if os.path.isfile('ortholog_markov_clusters.txt'): sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ": MCL was run earlier\n") else: retcode_MCL = runCMD(command) if retcode_MCL: sys.stderr.write('[ERROR]: MCL did not return 0') sys.exit('Something went wrong while running MCL') else: sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ": Completed MCL run - ortholog_markov_clusters.txt\n") ######################################################## ## WORKING - 5AUG-2015 """ Parse the MCL clusters """ sys.stdout.write("Opening MCL file: ortholog_markov_clusters.txt\n") MCL_FILE = open("ortholog_markov_clusters.txt") MK_CLUSTERS = {} ORX = 0 PMX = 0 if args.filter == False: sys.stdout.write( "[WARNING:] " + TIMESTAMP(time.time()) + ": dynamic ortholog length distribution filter switched OFF. Only filtering by species participation\n" ) elif args.filter == True: sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ": dynamic ortholog length distribution filter switched ON.\n") for line in MCL_FILE: # if PMX == 0: # print line listor = line.strip().split("\t") if len(listor) == NUMSPP: #Dlistor, stat_status = prettify_clust(listor,gene_dict,PROT_LENGTH,"True") Dlistor = {} stat_status = True for f in listor: sppf = gene_dict[f] Dlistor[sppf] = f PMX += 1 # if stat_status == False: # continue if len(Dlistor.keys()) == NUMSPP and stat_status == True: ORX += 1 sys.stdout.write( "\r[LOG:] " + TIMESTAMP(time.time()) + ":%d clusters passed length filters - and %d failed" % (ORX, PMX)) sys.stdout.flush() p_out = [] n_out = [] #OUTP = open("GeneSet"+str(ORX) + ".pepf",'w') OUTN = open("GeneSet" + str(ORX) + ".nucf", 'w') listout = [] raise_error = 0 for s in range(0, NUMSPP): sx = spp[s] gx = Dlistor[sx] #OUTP.write('>' + sx + " " + gx + "\n" + PEPS[gx]+ "\n") OUTN.write('>' + sx + " " + gx + "\n" + NUCS[gx] + "\n") # try: # FP = ">" + sx + "\n" + PEPS[gx]+ "\n" # p_out.append(FP) # except: # raise_error = 1 # try: # FN = '>' + sx + "\n" + NUCS[gx]+ "\n" # n_out.append(FN) # except: # raise_error = 1 #OUTP.close() OUTN.close() #if len(p_out) == NUMSPP and len(n_out) == NUMSPP: # OUTP = open("GeneSet"+str(ORX) + ".pepf",'w') # OUTN = open("GeneSet"+str(ORX) + ".nucf",'w') # for G in p_out: # OUTP.write(G) # for G in n_out: # OUTN.write(G) # OUTP.close() # OUTN.close() sys.stdout.write("\n") sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ": Number of markov clusters passing species number filter - %sd | %d FAILED \n" % (ORX, PMX)) ######################################################## """ Align every one of the [ORX] number of alignments and pick the successes """ sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":Starting processing of successful clusters\n") CODON_ALN_NUC = {} CODON_ALN_PEP = {} SUCCESSES = 0 FAILS = 0 sys.stdout.write("\n") #serial implementation for IDX in range(1, ORX + 1): status = process_geneset(IDX, CODON_ALN_NUC, CODON_ALN_PEP, int(args.threads)) if status == "failed": FAILS += 1 elif status == "success": SUCCESSES += 1 sys.stdout.write( "\r[CODON ALIGNMENT]: %s clusters successful || %s failed " % (SUCCESSES, FAILS)) sys.stdout.flush() sys.stdout.write("\n") sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":Finished processing all clusters. %s clusters produced successful alignments and %s failed \n" % (SUCCESSES, FAILS)) sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":Concatenating nucleotide and protein alignments for successful clusters\n" ) ######################################################## """ Combine all successful alignments """ successful_sets = set(CODON_ALN_NUC.keys()) CAT_P = {} CAT_N = {} order_genes = [] partitions = [] partitionsN = [] NOS = 0 PARTBREAK = 1 PARTBREAKN = 1 for sp in spp: CAT_P[sp] = '' CAT_N[sp] = '' for sets in successful_sets: sys.stdout.write("\r[[LOG:] " + TIMESTAMP(time.time()) + ": processing ID %s" % sets) sys.stdout.flush() DICP = CODON_ALN_PEP[sets] DICN = CODON_ALN_NUC[sets] for sp in spp: CAT_P[sp] += DICP[sp] #+= line. CAT_N[sp] += DICN[sp] lenp = len(CAT_P[sp]) lenN = len(CAT_N[sp]) NOS = NOS + 1 partition = "Gene%d = %d-%d;" % (NOS, PARTBREAK, lenp) for cod in [1, 2, 3]: addn = cod - 1 PBN = PARTBREAKN + addn partitionN = "Gene%d_pos%d = %d-%d\\3;" % (NOS, cod, PBN, lenN) partitionsN.append(partitionN) PARTBREAK = lenp + 1 PARTBREAKN = lenN + 1 partitions.append(partition) sys.stdout.write("\n") #Gene1_pos1 = 1-789\3; #Gene1_pos2 = 2-789\3; #Gene1_pos3 = 3-789\3; #Gene2_pos1 = 790-1449\3; #Gene2_pos2 = 791-1449\3; #Gene2_pos3 = 792-1449\3; #Gene3_pos1 = 1450-2208\3; #Gene3_pos2 = 1451-2208\3; #Gene3_pos3 = 1452-2208\3; OUTP = open("CONCAT_align_pep.fasta", 'w') OUTN = open("CONCAT_align_nuc.fasta", 'w') OUTPART = open("CONCAT_align_pep.partitions", "w") OUTPARTN = open("CONCAT_align_nuc.partitions", "w") for s in range(0, NUMSPP): sx = spp[s] OUTP.write('>' + sx + "\n" + CAT_P[sx] + "\n") OUTN.write('>' + sx + "\n" + CAT_N[sx] + "\n") sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":Writing partitions file for PartitionFinder2 by Rob Lanfear's group\n" ) OUTPART.write('\n'.join(partitions)) OUTPARTN.write('\n'.join(partitionsN)) OUTP.close() OUTN.close() OUTPART.close() OUTPARTN.close() sys.stdout.write("\n[LOG:] " + TIMESTAMP(time.time()) + ":Concatenation completed for all combined fasta\n") sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Concatenated peptide sequence for all successful alignments: -> CONCAT_align_pep.fasta\n" ) sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Concatenated nucleotide sequence for all successful alignments: -> CONCAT_align_nuc.fasta\n" ) sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Partitions for CONCAT_align_pep.fasta: -> CONCAT_align_pep.partitions\n" ) sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Partitions for CONCAT_align_nuc.fasta: -> CONCAT_align_nuc.partitions\n" ) ######################################################## """ run FastTreeMP max cores to create a ML phylogeny topology.. """ sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":Running FastTreeMP on multiple cores using CONCAT_align_nuc.fasta\n") retcode = runFastTreeMP("CONCAT_align_nuc.fasta", "CONCAT_align_nuc.tree") if retcode != 0: sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":FastTreeMP Failed... try command: FastTreeMP -wag -nosupport -bionj CONCAT_align_pep.fasta > CONCAT_align_pep.tree\n" ) elif retcode == 0: sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":Completed running FastTreeMP on multiple cores using CONCAT_align_nuc.fasta\n" ) sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Raw ML newick tree based on successful alignments in CONCAT_align_nuc.fasta : -> CONCAT_align_nuc.tree\n" ) ######################################################## """ Create the newick topology of the tree and set outgroups """ sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":Rooting input tree based on outgroups\n") infile = open("CONCAT_align_nuc.tree").readlines() t = infile[0] tree = ete2.Tree(t) outgroup = args.outgroup.split(',') if len(outgroup) == 1: tree.set_outgroup(outgroup[0]) if len(outgroup) >= 2: anc = tree.get_common_ancestor(outgroup) tree.set_outgroup(anc) ts = ete2.TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True tree.write(format=9, outfile="CONCAT_align_nuc.nwk") tree.render("Phylogeny_rooted.pdf", tree_style=ts, dpi=300) sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ": Finished rooting phylogeny\n") sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Rooted ML tree for phylogeny : -> CONCAT_align_nuc.nwk\n") sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Rooted ML tree for phylogeny : -> Phylogeny_rooted.pdf\n") ######################################################## """ Create the phylip file and also PartitionFinder config files for protien and nucleotide """ sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":Preparing PartitionFinder input\n") OUTPART = open("CONCAT_align_pep.partitionfinder.cfg", "w") OUTPARTN = open("CONCAT_align_nuc.partitionfinder.cfg", "w") OUTPART.write( "alignment = CONCAT_align_pep.phy\nuser_tree_topology = CONCAT_align_nuc.nwk;\nbranchlengths = linked;\nmodels = LG+G, LG+G+F;\nmodel_selection = AICc;\n[data_blocks]\n" ) OUTPARTN.write( "alignment = CONCAT_align_nuc.phy\nuser_tree_topology = CONCAT_align_nuc.nwk;\nbranchlengths = linked;\nmodels = LG+G, LG+G+F;\nmodel_selection = AICc;\n[data_blocks]\n" ) #search = rcluster\n OUTPART.write('\n'.join(partitions)) OUTPARTN.write('\n'.join(partitionsN)) OUTPART.write("[schemes]\nsearch=greedy;") OUTPARTN.write("[schemes]\nsearch=rcluster;") OUTPART.close() OUTPARTN.close() cmd = 'bash convertFasta2Phylip.sh CONCAT_align_pep.fasta > CONCAT_align_pep.phy' retcode = runCMD(cmd) if retcode != 0: sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":Conversion of FASTA to phylip failed\n") elif retcode == 0: sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":Conversion of FASTA to phylip succeeded for PEPTIDE\n") cmd = 'bash convertFasta2Phylip.sh CONCAT_align_nuc.fasta > CONCAT_align_nuc.phy' retcode = runCMD(cmd) if retcode != 0: sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":Conversion of FASTA to phylip failed\n") elif retcode == 0: sys.stdout.write( "[LOG:] " + TIMESTAMP(time.time()) + ":Conversion of FASTA to phylip succeeded for NUCLEOTIDE\n") sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Input config file for PartitionFinder : -> CONCAT_align_pep.partitionfinder.cfg\n" ) sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Input config file for PartitionFinder : -> CONCAT_align_nuc.partitionfinder.cfg\n" ) sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Input sequence file for PartitionFinder : -> CONCAT_align_pep.phy\n") sys.stdout.write( "[OUTPUTS:] " + TIMESTAMP(time.time()) + ":Input sequence file for PartitionFinder : -> CONCAT_align_nuc.phy\n") #sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":\n") ######################################################## """ """ #sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":\n") ######################################################## """ """ #sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":\n") ######################################################## """ """ #sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":\n") #sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ":\n") sys.stdout.write("[LOG:] " + TIMESTAMP(time.time()) + ": COMPLETEEEEE! \n")
raise Exception('Tree with equal depth leaf nodes required') layers = [set(leaves)] while next(iter(layers[-1])).up is not tree: layers.append(list(set([node.up for node in layers[-1]]))) layers = layers[::-1] for l, layer in enumerate(layers): for k, node in enumerate(layer): node.layer = l node.layer_index = k return layers def change_tree_node_size(tree, size): for node in tree.traverse(): node.img_style['size'] = size ts = ete2.TreeStyle() ts.mode = 'c' #ts.show_leaf_name = False #ts.layout_fn = named_internal_node_layout ts.scale = None ts.optimal_scale_level = 'full' tree = build_tree_from_dict(taxonomy) eq_tree_td = make_equal_depth_tree(tree) change_tree_node_size(eq_tree_td, 10) eq_tree_bu = make_equal_depth_tree_bottom_up(tree) change_tree_node_size(eq_tree_bu, 10) #layers = get_equal_depth_tree_layers(eq_tree) eq_tree_td.show(tree_style=ts) eq_tree_bu.show(tree_style=ts)
def make_figure(tree, timepoints, delimiter, scale, branch_vert_margin, fontsize, show_name, tree_orientation, show_scale=False): fig = tree.replace('_tree.nw', '_tree.pdf') orders = {tp.name: tp.order for tp in timepoints} colors = {tp.name: tp.color for tp in timepoints} # settins for name showing if show_name == 'none': show_name = [] if show_name == 'all': show_name = ['mab', 'root', 'input'] elif show_name == 'no-root': show_name = ['input', 'mab'] elif type(show_name) in [str, unicode]: show_name = [ show_name, ] # make the tree t = ete2.Tree(tree) t.set_outgroup(t & "root") # style the nodes based on timepoint for node in t.traverse(): earliest = get_earliest_leaf(node.get_leaf_names(), orders, delimiter) color = colors[earliest] node_type = get_node_type(node.name) style = ete2.NodeStyle() style['size'] = 0 style['vt_line_width'] = 1.0 style['hz_line_width'] = 1.0 style['vt_line_color'] = color style['hz_line_color'] = color style['vt_line_type'] = 0 style['hz_line_type'] = 0 if node_type in show_name: if node_type in ['mab', 'input']: name = ' ' + delimiter.join(node.name.split(delimiter)[1:]) else: name = ' ' + node.name tf = ete2.TextFace(name) tf.fsize = fontsize node.add_face(tf, column=0) style['fgcolor'] = '#000000' node.set_style(style) # style the full tree # root = (t&"root") # nearest_to_root, distance = root.get_closest_leaf() # root_node = t.get_common_ancestor(root, nearest_to_root) t.dist = 0 ts = ete2.TreeStyle() ts.orientation = tree_orientation ts.show_leaf_name = False if scale: ts.scale = int(scale) if branch_vert_margin: ts.branch_vertical_margin = float(branch_vert_margin) ts.show_scale = False # ladderize t.ladderize() # render the tree t.render(fig, tree_style=ts)
] specieslist = xistaltspeciestree #starttree = "ensembl_amniota23.tree" starttree = "hg38_100way.tree" outpdf = "hsXIST_alt1_tree.pdf" tree = ete.Tree(starttree) tree.prune(specieslist, preserve_branch_length=False) for n in tree.traverse(): style = ete.NodeStyle() #style['hz_line_width'] = 1 #style['vt_line_width'] = 1 style['size'] = 0 n.set_style(style) ts = ete.TreeStyle() ts.mode = 'r' ts.show_leaf_name = False ts.show_scale = False tree.render(outpdf, tree_style=ts, h=200) tree.show(tree_style=ts) """ grep -f <(grep ">" hg38_multiz100_RPL8_group1_mlocarna.fa | \ cut -c2-) ~/Documents/chang/psoralen/covariation/hg38_100way.name | \ cut -f5 | tr '\n' ',' | sed 's/,/", "/g' multiz100tree.prune(rpl8specieslist, preserve_branch_length = False) for n in multiz100tree.traverse(): style = ete.NodeStyle() #style['hz_line_width'] = 1