Esempio n. 1
0
def run():
    # Parse options
    parser = optparse.OptionParser(__doc__)
    parser.add_option('-a', '--attribute', dest="attribute", default=None)
    parser.add_option('-d', '--dpi', type="int", default=None)
    parser.add_option('-H', '--height', type="int", dest="h", default=None)
    parser.add_option('-l', '--label', default="name")
    parser.add_option('-m', '--multiple', default=False, action="store_true")
    parser.add_option('-o', '--output', default=None)
    parser.add_option('-u', '--units', default="px")
    parser.add_option('-w', '--width', type="int", dest="w", default=None)
    options, files = parser.parse_args()

    # Setup TreeStyle
    ts = ete2.TreeStyle()
    ts.show_scale = False
    ts.show_branch_support = True

    # Read trees
    for n, line in enumerate(fileinput.input(files)):
        t = ete2.Tree(line)

        # Add faces
        if options.attribute:
            values = set(
                [getattr(l, options.attribute) for l in t.get_leaves()])
            colours = get_colour_set(len(values))
            colour_map = dict(zip(values, colours))
            for l in t.iter_leaves():
                mycolour = colour_map[getattr(l, options.attribute)]
                l.add_face(
                    ete2.CircleFace(radius=10, color=mycolour, style="sphere"),
                    0)
        for l in t.iter_leaves():
            l.add_face(ete2.TextFace(getattr(l, options.label)), 1)

        # Plot or save
        if options.output:
            kw = {}
            if options.h or options.w:
                for o in ("h", "w", "units", "dpi"):
                    if getattr(options, o):
                        kw[o] = getattr(options, o)
            if options.multiple:
                base, ext = os.path.splitext(options.output)
                filename = base + ("_%06d" % (n + 1)) + ext
            else:
                filename = options.output
            t.render(filename, ultrametric, tree_style=ts, **kw)
        else:
            t.show(ultrametric, tree_style=ts)

        if not options.multiple:
            return 0

    return 0
Esempio n. 2
0
def draw_ete2_tree(organism, snplist, tree_file_name, config, c):
    '''Draws a phylogenetic tree using ETE2

    Keyword arguments:
    organism -- the organism of which to make a tree
    snplist -- a list of the SNP names, positions and state
    file_name -- the name of the out-file _tree.pdf will be added

    '''
    newick = tree_to_newick(organism, config, c)
    tree = ete2.Tree(newick, format=1)
    tree_depth = int(tree.get_distance(tree.get_farthest_leaf()[0]))
    for n in tree.traverse():
        # Nodes are set to red colour
        nstyle = ete2.NodeStyle()
        nstyle["fgcolor"] = "#BE0508"
        nstyle["size"] = 10
        nstyle["vt_line_color"] = "#000000"
        nstyle["hz_line_color"] = "#000000"
        nstyle["vt_line_type"] = 0
        nstyle["hz_line_type"] = 0
        nstyle["vt_line_width"] = 2
        nstyle["hz_line_width"] = 2
        for snp in snplist:
            if n.name == snp[0]:
                if snp[1] == snp[3]:
                    # If the SNP is Derived in snplist,
                    # change appearance of node
                    nstyle["fgcolor"] = "#99FF66"
                    nstyle["size"] = 15
                    nstyle["vt_line_color"] = "#000000"
                    nstyle["hz_line_color"] = "#000000"
                    nstyle["vt_line_type"] = 0
                    nstyle["hz_line_type"] = 0
                elif snp[3] == "-":
                    # If the SNP is missing due to a gap, make it grey
                    nstyle["fgcolor"] = "#DDDDDD"
                    nstyle["size"] = 10
                    nstyle["vt_line_color"] = "#DDDDDD"
                    nstyle["hz_line_color"] = "#DDDDDD"
                    nstyle["vt_line_type"] = 1
                    nstyle["hz_line_type"] = 1
        n.set_style(nstyle)
    ts = ete2.TreeStyle()
    ts.show_leaf_name = False  # Do not print(leaf names, they are added in layout)
    ts.show_scale = False  # Do not show the scale
    ts.layout_fn = CanSNPer_tree_layout  # Use the custom layout
    ts.optimal_scale_level = 'full'  # Fully expand the branches of the tree
    if config["dev"]:
        print("#[DEV] Tree file: %s" % tree_file_name)
    tree.render(tree_file_name, tree_style=ts, w=tree_depth * 500)
Esempio n. 3
0
	def render_tree(self):
		newick=make_newick(self)+';'
		# countleft=0
		# countright=0
		# for char in newick:
		# 	if char=='(':
		# 		countleft+=1
		# 	elif char==')':
		# 		countright+=1
		# print countleft,' ',countright
		# print newick
		self.newick=ete2.Tree(newick,format=8)
		ts=ete2.TreeStyle()
		ts.rotation=90
		#self.newick.show(tree_style=ts)
		self.newick.show(tree_style=ts)
Esempio n. 4
0
	def save_tree_to_file(self,filepath):
		newick=make_newick(self)+';'
		# countleft=0
		# countright=0
		# for char in newick:
		# 	if char=='(':
		# 		countleft+=1
		# 	elif char==')':
		# 		countright+=1
		# print countleft,' ',countright
		# print newick
		self.newick=ete2.Tree(newick,format=1)
		ts=ete2.TreeStyle()
		ts.rotation=90
		#self.newick.show(tree_style=ts)
		self.newick.render(filepath,w=500,tree_style=ts)
Esempio n. 5
0
 def _make_tree_figure(self,
                       tree,
                       fig,
                       colors,
                       orders,
                       root_name,
                       scale=None,
                       branch_vert_margin=None,
                       fontsize=12,
                       show_names=True,
                       name_field='seq_id',
                       rename_function=None,
                       color_node_labels=False,
                       label_colors=None,
                       tree_orientation=0,
                       min_order_fraction=0.1,
                       show_root_name=False,
                       chain=None,
                       linked_alignment=None,
                       alignment_fontsize=11,
                       alignment_height=50,
                       alignment_width=50,
                       compact_alignment=False,
                       scale_factor=1,
                       linewidth=1,
                       show_scale=False):
     if show_names is True:
         if chain == 'heavy':
             show_names = [
                 p.heavy[name_field] for p in self.pairs
                 if p.heavy is not None
             ]
         else:
             show_names = [
                 p.light[name_field] for p in self.pairs
                 if p.light is not None
             ]
     elif show_names is False:
         show_names = []
     if show_root_name is True:
         show_names.append(root_name)
     if linked_alignment is not None:
         t = ete2.PhyloTree(tree,
                            alignment=linked_alignment,
                            alg_format='fasta')
         ete2.faces.SequenceItem = MySequenceItem
     else:
         t = ete2.Tree(tree)
     t.set_outgroup(t & root_name)
     # style the nodes
     for node in t.traverse():
         if orders is not None:
             leaves = node.get_leaf_names()
             order_count = Counter([orders[l] for l in leaves])
             for order in sorted(order_count.keys()):
                 if float(order_count[order]) / len(
                         leaves) >= min_order_fraction:
                     color = colors[order]
                     break
         else:
             color = colors.get(node.name, '#000000')
         if linked_alignment is not None:
             node.add_feature('aln_fontsize', alignment_fontsize)
             node.add_feature('aln_height', alignment_height)
             node.add_feature('aln_width', alignment_width)
             node.add_feature('fontsize', fontsize)
             node.add_feature('format', 'seq')
             node.add_feature('scale_factor', scale_factor)
         style = ete2.NodeStyle()
         style['size'] = 0
         style['vt_line_width'] = float(linewidth)
         style['hz_line_width'] = float(linewidth)
         style['vt_line_color'] = color
         style['hz_line_color'] = color
         style['vt_line_type'] = 0
         style['hz_line_type'] = 0
         # else:
         #     style['size'] = 0
         #     style['vt_line_width'] = float(linewidth)
         #     style['hz_line_width'] = float(linewidth)
         #     style['vt_line_color'] = color
         #     style['hz_line_color'] = color
         #     style['vt_line_type'] = 0
         #     style['hz_line_type'] = 0
         if node.name in show_names:
             if color_node_labels:
                 if label_colors is None:
                     node_color = color
                 elif type(label_colors) == dict:
                     node_color = label_colors.get(node.name, '#000000')
                 elif type(label_colors) in [list, tuple]:
                     node_color = color if node.name in label_colors else '#000000'
                 else:
                     node_color = '#000000'
             else:
                 node_color = '#000000'
             node_name = node.name if rename_function is None else rename_function(
                 node.name)
             tf = ete2.TextFace(node_name,
                                fsize=fontsize,
                                fgcolor=node_color)
             # tf.fsize = fontsize
             node.add_face(tf, column=0)
             # style['fgcolor'] = hex_to_rgb(node_color)
         # else:
         #     if hasattr(node, "sequence"):
         #         node.add_face(ete2.SeqMotifFace(seq=node.sequence,
         #                                         seqtype="aa",
         #                                         height=50,
         #                                         seq_format="seq"), column=0, position="aligned")
         node.set_style(style)
     t.dist = 0
     ts = ete2.TreeStyle()
     if linked_alignment is not None:
         ts.layout_fn = self._phyloalignment_layout_function
     ts.orientation = tree_orientation
     ts.show_leaf_name = False
     if scale is not None:
         ts.scale = int(scale)
     if branch_vert_margin is not None:
         ts.branch_vertical_margin = float(branch_vert_margin)
     ts.show_scale = show_scale
     # ladderize
     t.ladderize()
     # render the tree
     t.render(fig, tree_style=ts)
Esempio n. 6
0
def tree_draw(tree_file,
              tree_name=None,
              order_vector_file=None,
              cell_colors_file=None,
              clustering_colors_file=None,
              clustering_sizes_file=None,
              intermediate_node_sizes_file=None,
              intermediate_node_labels_file=None,
              leaf_labels_file=None,
              legend_file=None,
              duplicate_file=None,
              tree_scale='linear',
              tree_rotation=True,
              font_size=7,
              font_legend=7,
              node_size=3,
              scale_rate=None,
              distance_factor=1,
              y_scale=False):

    t = ete2.Tree(newick=tree_file, format=1)
    ts = ete2.TreeStyle()
    if tree_rotation:
        ts.rotation = 90
    ts.show_leaf_name = True
    ts.show_scale = False
    ts.scale = 1
    if tree_name:
        ts.title.add_face(ete2.TextFace(tree_name, fsize=20), column=0)

    styles = {}
    max_dist = 0

    # initialize all nodes and branches
    for n in t.traverse():
        styles[n.name] = dict()
        styles[n.name]['style'] = ete2.NodeStyle()
        styles[n.name]['style']['fgcolor'] = 'black'
        max_dist = max(max_dist, n.dist)

    # calculate the scale for the tree (log, linear and right size)
    if tree_scale == 'log':
        max_dist = 0
    root = t.get_tree_root()
    last_leaf = root.get_farthest_leaf()
    ts.y_axis['scale_min_value'] = root.dist
    ts.y_axis['scale_max_value'] = last_leaf.dist

    for n in t.traverse():
        if tree_scale == 'log':
            if n == root:
                styles[n.name]['dist'] = 0
            else:
                father_path = 0
                for ancestor in n.get_ancestors():
                    father_path += styles[ancestor.name]['dist']

                dist = math.log10(n.get_distance(root) * distance_factor +
                                  1) - father_path
                if dist < 0:
                    dist = 0
                styles[n.name]['dist'] = dist
                max_dist = max(max_dist, dist)

        elif tree_scale == 'linear':
            if max_dist > 1:
                styles[n.name]['dist'] = round(n.dist / max_dist)
            else:
                styles[n.name]['dist'] = n.dist

    # leaf styles and update distance
    if not scale_rate:
        scale_rate = max(1000, round(1 / max_dist))
    for n in t.traverse():
        if 'dist' in styles[n.name]:
            n.dist = styles[n.name]['dist'] * scale_rate
        if not n.is_leaf():
            styles[n.name]['style']["size"] = 0
        else:
            styles[n.name]['style']["size"] = node_size

    # add bootstrap values to the branches (size of the node)
    if intermediate_node_sizes_file:
        bootsrtap_sizes = utils.get_bootsrtap_size(
            intermediate_node_sizes_file)
        for branch, size in bootsrtap_sizes.iteritems():
            styles[branch]['style']["size"] = size
            styles[branch]['style']['fgcolor'] = 'black'

    # add colors to the leafs
    if cell_colors_file:
        cells_colors = utils.get_cells_colors(cell_colors_file)
        for name, color in cells_colors.iteritems():
            styles[name]['style']['fgcolor'] = color

    # reorder the tree by pre-proses if possible
    if order_vector_file:
        leaf_order = utils.get_leaf_order(order_vector_file)
        for n in t.traverse('postorder'):
            if n.get_descendants():
                a = ''
                for leaf in n.get_descendants(strategy='postorder'):
                    if leaf.is_leaf():
                        if not a:
                            a = leaf
                b = n.get_descendants(strategy='preorder')[-1]

                if a.is_leaf() and b.is_leaf():
                    if leaf_order[a.name] > leaf_order[b.name]:
                        left, right = n.children
                        n.children = [right, left]

    # add width to branches
    if clustering_sizes_file:
        t, styles = size_clustering(t, styles, clustering_sizes_file)

    # add colors to branches
    if clustering_colors_file:
        t, ts, styles = color_clustering(t, ts, styles, clustering_colors_file)

    # add new leaf labels
    if leaf_labels_file:
        cells_labels = utils.get_cells_labels(leaf_labels_file)
        ts.show_leaf_name = False
        for name, label in cells_labels.iteritems():
            nodes = t.search_nodes(name=name)
            assert len(nodes) == 1
            node = nodes[0]
            if name in cells_colors:
                name_face = ete2.faces.TextFace(cells_labels[name],
                                                fsize=font_size,
                                                fgcolor=cells_colors[name])
            else:
                name_face = ete2.faces.TextFace(cells_labels[name],
                                                fsize=font_size)

            name_face.margin_left = 3
            node.add_face(name_face, column=0)

    # add duplicate tags to nodes
    if duplicate_file:
        dup_labels = utils.get_dup_labels(duplicate_file)
        for name, color in dup_labels.iteritems():
            node = node_check(name, t)
            if not node:
                continue
            dup_face = ete2.faces.TextFace('*', fsize=10, fgcolor=color)
            dup_face.margin_left = 5
            node.add_face(dup_face, column=1)

    # add legend to the tree
    if legend_file:
        legend = utils.get_legend(legend_file)
        for mark in legend.keys():
            ts.legend.add_face(ete2.faces.CircleFace(2, legend[mark]),
                               column=0)
            legend_txt = ete2.faces.TextFace(mark, fsize=font_legend)
            legend_txt.margin_left = 5
            ts.legend.add_face(legend_txt, column=1)
        ts.legend_position = 4

    # add y-scale to the picture
    if y_scale:

        ts.y_axis['scale_type'] = tree_scale
        ts.y_axis['scale_length'] = last_leaf.dist - root.dist

    # set all the styles
    for n in t.traverse():
        if n.name == 'IDroot':
            n.dist = 0
            n.delete()
        if n.is_root():
            n.dist = 0
            n.delete()
        n.set_style(styles[n.name]['style'])
    root = ete2.faces.CircleFace(2, 'white')
    root.border.width = 1
    root.border.color = 'black'
    t.add_face(root, column=0, position='float')

    # t.render("%%inline", tree_style=ts)
    return t, ts
def main():
    class MyParser(argparse.ArgumentParser):
        def error(self, message):
            sys.stderr.write('[ERROR]: error: %s\n' % message)
            self.print_help()
            sys.exit(2)

    # parse command line
    argparser = MyParser()
    argparser.usage = '------------\n%(prog)s -l [FILE] -g [PATH] -p [PATH] -n [PATH] -t [THREADS] --noFilter/--Filter'
    argparser.description = 'Accepts MultiMSOAR2 output and all_vs_all blast to classify genes into summarized orthogroups and supergroups and gene duplications and birth.'
    argparser.epilog = '--------------'
    argparser.add_argument(
        "-l",
        required=True,
        metavar='FILE',
        dest='genelist',
        help=
        "File with list of all genes in each species - format is 1 line per species"
    )
    argparser.add_argument(
        "-g",
        required=True,
        metavar='PATH',
        dest='ORGPATH',
        help="Path to all pairwise ortholog files {WKDIR/MultiMSOAR_inputs")
    argparser.add_argument("-p",
                           required=True,
                           metavar='PATH',
                           dest='PEPPATH',
                           help="Path to all peptide files {WKDIR")
    argparser.add_argument("-n",
                           required=True,
                           metavar='PATH',
                           dest='NUCPATH',
                           help="Path to all nucleotide files {WKDIR")
    argparser.add_argument("-t",
                           default=1,
                           metavar='NUM',
                           dest='threads',
                           help="Number of threads to run on")
    argparser.add_argument(
        "--noFilter",
        action='store_false',
        default=False,
        dest='filter',
        help="Set to switch off dynamic filtering of ortholog groups")
    argparser.add_argument(
        "--Filter",
        action='store_true',
        default=False,
        dest='filter',
        help="Set to switch off dynamic filtering of ortholog groups")
    argparser.add_argument(
        "--PF-prep",
        action='store_true',
        default=False,
        dest='PF',
        help="Set to switch off dynamic filtering of ortholog groups")
    argparser.add_argument(
        "--outgroup",
        required=True,
        metavar='PATH',
        dest='outgroup',
        help=
        "Comma separated list of outgroups . eg. SP1 if 1 species\nOR\nSP1,SP2 for more than 1 outgroup"
    )

    if len(sys.argv) == 1:
        argparser.print_help()
        sys.exit(1)
    args = argparser.parse_args()

    PEPFILES = glob.glob(args.PEPPATH + "/*.pep")
    NUCFILES = glob.glob(args.NUCPATH + "/*.nuc")
    ORGFILES = glob.glob(args.ORGPATH + "/S*_S?*")

    ########################################################
    """
    Parse the list of genes in each species
    """
    FILE1 = open(args.genelist).readlines()

    print ODIR

    spp = []
    gene_dict = {}  #DICT{GENE:SPP}

    for f in FILE1:
        line = f.split("\t")
        species = line[0]
        spp.append(species)
        for x in range(1, len(line)):
            gene_rec = line[x]
            gene_dict[gene_rec] = species

    sys.stdout.write("#\ntotal number of species catalogued:" + str(len(spp)))
    sys.stdout.write("#\nTotal genes in gene_dict =" + str(len(gene_dict)) +
                     "\n")
    NUMSPP = len(spp)

    # Protein list
    PEPS = concat_files(PEPFILES, "all_prots.fa", type="fasta")
    PROT_LENGTH = {}
    for f in PEPS.keys():
        L = len(PEPS[f])
        PROT_LENGTH[f] = L

    # Nucleotide list
    NUCS = concat_files(NUCFILES, "all_nucs.fa", type="fasta")
    #remove_files(["all_nucs.fa","all_prots.fa"])
    ########################################################
    """
    Create the MCL clusters
    """
    OGF = concat_files(ORGFILES, 'orthologs_cat.txt', type="orthologs")
    #    with open('orthologs_cat.txt', 'a') as outfile:
    #        for fname in ORGFILES:
    #            with open(fname) as infile:
    #               outfile.write(infile.read())

    # MCL command:

    sys.stdout.write(
        "[LOG:]     " + TIMESTAMP(time.time()) +
        ": Completed concatenating %d pairwise orthologs. Proceeding to run MCL\n"
        % OGF)
    command = "mcl orthologs_cat.txt --abc -te %d -I 2.0 -o ortholog_markov_clusters.txt" % int(
        args.threads)
    if os.path.isfile('ortholog_markov_clusters.txt'):
        sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                         ": MCL was run earlier\n")

    else:
        retcode_MCL = runCMD(command)
        if retcode_MCL:
            sys.stderr.write('[ERROR]: MCL did not return 0')
            sys.exit('Something went wrong while running MCL')
        else:
            sys.stdout.write(
                "[LOG:]     " + TIMESTAMP(time.time()) +
                ": Completed MCL run - ortholog_markov_clusters.txt\n")

    ########################################################
    ## WORKING - 5AUG-2015
    """
    Parse the MCL clusters 
    """
    sys.stdout.write("Opening MCL file: ortholog_markov_clusters.txt\n")
    MCL_FILE = open("ortholog_markov_clusters.txt")
    MK_CLUSTERS = {}
    ORX = 0
    PMX = 0
    if args.filter == False:
        sys.stdout.write(
            "[WARNING:] " + TIMESTAMP(time.time()) +
            ": dynamic ortholog length distribution filter switched OFF. Only filtering by species participation\n"
        )
    elif args.filter == True:
        sys.stdout.write(
            "[LOG:]     " + TIMESTAMP(time.time()) +
            ": dynamic ortholog length distribution filter switched ON.\n")
    for line in MCL_FILE:
        # if PMX == 0:
        # print line

        listor = line.strip().split("\t")
        if len(listor) == NUMSPP:

            #Dlistor, stat_status = prettify_clust(listor,gene_dict,PROT_LENGTH,"True")
            Dlistor = {}
            stat_status = True
            for f in listor:
                sppf = gene_dict[f]
                Dlistor[sppf] = f
            PMX += 1
            # if stat_status == False:
            # continue
            if len(Dlistor.keys()) == NUMSPP and stat_status == True:
                ORX += 1
                sys.stdout.write(
                    "\r[LOG:]     " + TIMESTAMP(time.time()) +
                    ":%d clusters passed length filters - and %d failed" %
                    (ORX, PMX))
                sys.stdout.flush()
                p_out = []
                n_out = []

                #OUTP = open("GeneSet"+str(ORX) + ".pepf",'w')
                OUTN = open("GeneSet" + str(ORX) + ".nucf", 'w')

                listout = []
                raise_error = 0
                for s in range(0, NUMSPP):
                    sx = spp[s]
                    gx = Dlistor[sx]

                    #OUTP.write('>' + sx + " " + gx + "\n" + PEPS[gx]+ "\n")
                    OUTN.write('>' + sx + " " + gx + "\n" + NUCS[gx] + "\n")
                #    try:
                #        FP = ">" + sx + "\n" + PEPS[gx]+ "\n"
                #        p_out.append(FP)
                #    except:
                #        raise_error = 1
                #    try:
                #        FN = '>' + sx + "\n" + NUCS[gx]+ "\n"
                #        n_out.append(FN)
                #    except:
                #        raise_error = 1

                #OUTP.close()
                OUTN.close()
                #if len(p_out) == NUMSPP and len(n_out) == NUMSPP:
                #    OUTP = open("GeneSet"+str(ORX) + ".pepf",'w')
                #    OUTN = open("GeneSet"+str(ORX) + ".nucf",'w')
                #    for G in p_out:
                #        OUTP.write(G)
                #    for G in n_out:
                #        OUTN.write(G)
                #    OUTP.close()
                #    OUTN.close()

    sys.stdout.write("\n")
    sys.stdout.write(
        "[LOG:]     " + TIMESTAMP(time.time()) +
        ": Number of markov clusters passing species number filter - %sd | %d FAILED \n"
        % (ORX, PMX))

    ########################################################
    """
    Align every one of the [ORX] number of alignments and pick the successes
    """
    sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                     ":Starting processing of successful clusters\n")
    CODON_ALN_NUC = {}
    CODON_ALN_PEP = {}
    SUCCESSES = 0
    FAILS = 0
    sys.stdout.write("\n")
    #serial implementation
    for IDX in range(1, ORX + 1):
        status = process_geneset(IDX, CODON_ALN_NUC, CODON_ALN_PEP,
                                 int(args.threads))
        if status == "failed":
            FAILS += 1
        elif status == "success":
            SUCCESSES += 1
        sys.stdout.write(
            "\r[CODON ALIGNMENT]: %s clusters successful || %s failed " %
            (SUCCESSES, FAILS))
        sys.stdout.flush()

    sys.stdout.write("\n")
    sys.stdout.write(
        "[LOG:]     " + TIMESTAMP(time.time()) +
        ":Finished processing all clusters. %s clusters produced successful alignments and %s failed \n"
        % (SUCCESSES, FAILS))

    sys.stdout.write(
        "[LOG:]     " + TIMESTAMP(time.time()) +
        ":Concatenating nucleotide and protein alignments for successful clusters\n"
    )

    ########################################################
    """
    Combine all successful alignments
    """
    successful_sets = set(CODON_ALN_NUC.keys())

    CAT_P = {}
    CAT_N = {}

    order_genes = []
    partitions = []
    partitionsN = []
    NOS = 0
    PARTBREAK = 1
    PARTBREAKN = 1
    for sp in spp:
        CAT_P[sp] = ''
        CAT_N[sp] = ''
    for sets in successful_sets:
        sys.stdout.write("\r[[LOG:]     " + TIMESTAMP(time.time()) +
                         ": processing ID %s" % sets)
        sys.stdout.flush()
        DICP = CODON_ALN_PEP[sets]
        DICN = CODON_ALN_NUC[sets]
        for sp in spp:
            CAT_P[sp] += DICP[sp]  #+= line.
            CAT_N[sp] += DICN[sp]
            lenp = len(CAT_P[sp])
            lenN = len(CAT_N[sp])
        NOS = NOS + 1
        partition = "Gene%d = %d-%d;" % (NOS, PARTBREAK, lenp)
        for cod in [1, 2, 3]:
            addn = cod - 1
            PBN = PARTBREAKN + addn
            partitionN = "Gene%d_pos%d = %d-%d\\3;" % (NOS, cod, PBN, lenN)
            partitionsN.append(partitionN)
        PARTBREAK = lenp + 1
        PARTBREAKN = lenN + 1
        partitions.append(partition)

    sys.stdout.write("\n")
    #Gene1_pos1 = 1-789\3;
    #Gene1_pos2 = 2-789\3;
    #Gene1_pos3 = 3-789\3;
    #Gene2_pos1 = 790-1449\3;
    #Gene2_pos2 = 791-1449\3;
    #Gene2_pos3 = 792-1449\3;
    #Gene3_pos1 = 1450-2208\3;
    #Gene3_pos2 = 1451-2208\3;
    #Gene3_pos3 = 1452-2208\3;

    OUTP = open("CONCAT_align_pep.fasta", 'w')
    OUTN = open("CONCAT_align_nuc.fasta", 'w')
    OUTPART = open("CONCAT_align_pep.partitions", "w")
    OUTPARTN = open("CONCAT_align_nuc.partitions", "w")

    for s in range(0, NUMSPP):
        sx = spp[s]
        OUTP.write('>' + sx + "\n" + CAT_P[sx] + "\n")
        OUTN.write('>' + sx + "\n" + CAT_N[sx] + "\n")

    sys.stdout.write(
        "[LOG:]     " + TIMESTAMP(time.time()) +
        ":Writing partitions file for PartitionFinder2 by Rob Lanfear's group\n"
    )

    OUTPART.write('\n'.join(partitions))
    OUTPARTN.write('\n'.join(partitionsN))
    OUTP.close()
    OUTN.close()
    OUTPART.close()
    OUTPARTN.close()

    sys.stdout.write("\n[LOG:]     " + TIMESTAMP(time.time()) +
                     ":Concatenation completed for all combined fasta\n")

    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Concatenated peptide sequence for all successful alignments: -> CONCAT_align_pep.fasta\n"
    )
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Concatenated nucleotide sequence for all successful alignments: -> CONCAT_align_nuc.fasta\n"
    )
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Partitions for CONCAT_align_pep.fasta: -> CONCAT_align_pep.partitions\n"
    )
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Partitions for CONCAT_align_nuc.fasta: -> CONCAT_align_nuc.partitions\n"
    )
    ########################################################
    """
    run FastTreeMP max cores to create a ML phylogeny topology..
    """
    sys.stdout.write(
        "[LOG:]     " + TIMESTAMP(time.time()) +
        ":Running FastTreeMP on multiple cores using CONCAT_align_nuc.fasta\n")
    retcode = runFastTreeMP("CONCAT_align_nuc.fasta", "CONCAT_align_nuc.tree")
    if retcode != 0:
        sys.stdout.write(
            "[LOG:]     " + TIMESTAMP(time.time()) +
            ":FastTreeMP Failed... try command: FastTreeMP -wag -nosupport -bionj CONCAT_align_pep.fasta > CONCAT_align_pep.tree\n"
        )
    elif retcode == 0:
        sys.stdout.write(
            "[LOG:]     " + TIMESTAMP(time.time()) +
            ":Completed running FastTreeMP on multiple cores using CONCAT_align_nuc.fasta\n"
        )

    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Raw ML newick tree based on successful alignments in CONCAT_align_nuc.fasta : -> CONCAT_align_nuc.tree\n"
    )
    ########################################################
    """
    Create the newick topology of the tree and set outgroups
    """
    sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                     ":Rooting input tree based on outgroups\n")
    infile = open("CONCAT_align_nuc.tree").readlines()
    t = infile[0]
    tree = ete2.Tree(t)

    outgroup = args.outgroup.split(',')
    if len(outgroup) == 1:
        tree.set_outgroup(outgroup[0])
    if len(outgroup) >= 2:
        anc = tree.get_common_ancestor(outgroup)
        tree.set_outgroup(anc)

    ts = ete2.TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = True
    ts.show_branch_support = True

    tree.write(format=9, outfile="CONCAT_align_nuc.nwk")
    tree.render("Phylogeny_rooted.pdf", tree_style=ts, dpi=300)
    sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                     ": Finished rooting phylogeny\n")
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Rooted ML tree for phylogeny : -> CONCAT_align_nuc.nwk\n")
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Rooted ML tree for phylogeny : -> Phylogeny_rooted.pdf\n")

    ########################################################
    """
    Create the phylip file and also PartitionFinder config files for protien and nucleotide
    """
    sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                     ":Preparing PartitionFinder input\n")

    OUTPART = open("CONCAT_align_pep.partitionfinder.cfg", "w")
    OUTPARTN = open("CONCAT_align_nuc.partitionfinder.cfg", "w")
    OUTPART.write(
        "alignment = CONCAT_align_pep.phy\nuser_tree_topology = CONCAT_align_nuc.nwk;\nbranchlengths = linked;\nmodels = LG+G, LG+G+F;\nmodel_selection = AICc;\n[data_blocks]\n"
    )
    OUTPARTN.write(
        "alignment = CONCAT_align_nuc.phy\nuser_tree_topology = CONCAT_align_nuc.nwk;\nbranchlengths = linked;\nmodels = LG+G, LG+G+F;\nmodel_selection = AICc;\n[data_blocks]\n"
    )  #search = rcluster\n
    OUTPART.write('\n'.join(partitions))
    OUTPARTN.write('\n'.join(partitionsN))
    OUTPART.write("[schemes]\nsearch=greedy;")
    OUTPARTN.write("[schemes]\nsearch=rcluster;")
    OUTPART.close()
    OUTPARTN.close()

    cmd = 'bash convertFasta2Phylip.sh CONCAT_align_pep.fasta > CONCAT_align_pep.phy'
    retcode = runCMD(cmd)
    if retcode != 0:
        sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                         ":Conversion of FASTA to phylip failed\n")
    elif retcode == 0:
        sys.stdout.write(
            "[LOG:]     " + TIMESTAMP(time.time()) +
            ":Conversion of FASTA to phylip succeeded for PEPTIDE\n")

    cmd = 'bash convertFasta2Phylip.sh CONCAT_align_nuc.fasta > CONCAT_align_nuc.phy'
    retcode = runCMD(cmd)
    if retcode != 0:
        sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                         ":Conversion of FASTA to phylip failed\n")
    elif retcode == 0:
        sys.stdout.write(
            "[LOG:]     " + TIMESTAMP(time.time()) +
            ":Conversion of FASTA to phylip succeeded for NUCLEOTIDE\n")

    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Input config file for PartitionFinder : -> CONCAT_align_pep.partitionfinder.cfg\n"
    )
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Input config file for PartitionFinder : -> CONCAT_align_nuc.partitionfinder.cfg\n"
    )
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Input sequence file for PartitionFinder : -> CONCAT_align_pep.phy\n")
    sys.stdout.write(
        "[OUTPUTS:] " + TIMESTAMP(time.time()) +
        ":Input sequence file for PartitionFinder : -> CONCAT_align_nuc.phy\n")

    #sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) + ":\n")

    ########################################################
    """

    """

    #sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) + ":\n")

    ########################################################
    """

    """

    #sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) + ":\n")

    ########################################################
    """

    """

    #sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) + ":\n")
    #sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) + ":\n")
    sys.stdout.write("[LOG:]     " + TIMESTAMP(time.time()) +
                     ": COMPLETEEEEE! \n")
Esempio n. 8
0
        raise Exception('Tree with equal depth leaf nodes required')
    layers = [set(leaves)]
    while next(iter(layers[-1])).up is not tree:
        layers.append(list(set([node.up for node in layers[-1]])))
    layers = layers[::-1]
    for l, layer in enumerate(layers):
        for k, node in enumerate(layer):
            node.layer = l
            node.layer_index = k
    return layers


def change_tree_node_size(tree, size):
    for node in tree.traverse():
        node.img_style['size'] = size


ts = ete2.TreeStyle()
ts.mode = 'c'
#ts.show_leaf_name = False
#ts.layout_fn = named_internal_node_layout
ts.scale = None
ts.optimal_scale_level = 'full'
tree = build_tree_from_dict(taxonomy)
eq_tree_td = make_equal_depth_tree(tree)
change_tree_node_size(eq_tree_td, 10)
eq_tree_bu = make_equal_depth_tree_bottom_up(tree)
change_tree_node_size(eq_tree_bu, 10)
#layers = get_equal_depth_tree_layers(eq_tree)
eq_tree_td.show(tree_style=ts)
eq_tree_bu.show(tree_style=ts)
Esempio n. 9
0
def make_figure(tree,
                timepoints,
                delimiter,
                scale,
                branch_vert_margin,
                fontsize,
                show_name,
                tree_orientation,
                show_scale=False):
    fig = tree.replace('_tree.nw', '_tree.pdf')
    orders = {tp.name: tp.order for tp in timepoints}
    colors = {tp.name: tp.color for tp in timepoints}
    # settins for name showing
    if show_name == 'none':
        show_name = []
    if show_name == 'all':
        show_name = ['mab', 'root', 'input']
    elif show_name == 'no-root':
        show_name = ['input', 'mab']
    elif type(show_name) in [str, unicode]:
        show_name = [
            show_name,
        ]
    # make the tree
    t = ete2.Tree(tree)
    t.set_outgroup(t & "root")
    # style the nodes based on timepoint
    for node in t.traverse():
        earliest = get_earliest_leaf(node.get_leaf_names(), orders, delimiter)
        color = colors[earliest]
        node_type = get_node_type(node.name)
        style = ete2.NodeStyle()
        style['size'] = 0
        style['vt_line_width'] = 1.0
        style['hz_line_width'] = 1.0
        style['vt_line_color'] = color
        style['hz_line_color'] = color
        style['vt_line_type'] = 0
        style['hz_line_type'] = 0
        if node_type in show_name:
            if node_type in ['mab', 'input']:
                name = ' ' + delimiter.join(node.name.split(delimiter)[1:])
            else:
                name = ' ' + node.name
            tf = ete2.TextFace(name)
            tf.fsize = fontsize
            node.add_face(tf, column=0)
            style['fgcolor'] = '#000000'
        node.set_style(style)
    # style the full tree
    # root = (t&"root")
    # nearest_to_root, distance = root.get_closest_leaf()
    # root_node = t.get_common_ancestor(root, nearest_to_root)
    t.dist = 0
    ts = ete2.TreeStyle()
    ts.orientation = tree_orientation
    ts.show_leaf_name = False
    if scale:
        ts.scale = int(scale)
    if branch_vert_margin:
        ts.branch_vertical_margin = float(branch_vert_margin)
    ts.show_scale = False
    # ladderize
    t.ladderize()
    # render the tree
    t.render(fig, tree_style=ts)
Esempio n. 10
0
]
specieslist = xistaltspeciestree
#starttree = "ensembl_amniota23.tree"
starttree = "hg38_100way.tree"
outpdf = "hsXIST_alt1_tree.pdf"

tree = ete.Tree(starttree)
tree.prune(specieslist, preserve_branch_length=False)

for n in tree.traverse():
    style = ete.NodeStyle()
    #style['hz_line_width'] = 1
    #style['vt_line_width'] = 1
    style['size'] = 0
    n.set_style(style)
ts = ete.TreeStyle()
ts.mode = 'r'
ts.show_leaf_name = False
ts.show_scale = False
tree.render(outpdf, tree_style=ts, h=200)
tree.show(tree_style=ts)
"""
grep -f <(grep ">" hg38_multiz100_RPL8_group1_mlocarna.fa | \
cut -c2-) ~/Documents/chang/psoralen/covariation/hg38_100way.name | \
cut -f5 | tr '\n' ',' | sed 's/,/", "/g'

multiz100tree.prune(rpl8specieslist, preserve_branch_length = False)

for n in multiz100tree.traverse():
    style = ete.NodeStyle()
    #style['hz_line_width'] = 1