def treelegendtext(cluster, color):
    text = TextFace(" %s " % cluster)
    text.hz_align = False
    text.fsize = 30
    text.fstyle = 'Bold'
    text.background.color = color
    return text
def treelegendtext(whattoprint, color):
    text = TextFace(" %s " % whattoprint)
    text.hz_align = False
    text.fsize = 30
    text.fstyle = 'Bold'
    text.background.color = color
    return text
def make_tree(treefile, image_file, clone_info):
    colour_list = ['MidnightBlue','RoyalBlue', 'LightSkyBlue', 'Aquamarine', 'SpringGreen', 'GreenYellow',\
                   'Gold','DarkOrange']
    weeks = ['16', '30', '38', '48', '59', '119', '176', '206']
    weeks = ['6', '14', '53', '92','144']
    t = Tree(treefile,format = 1)
    ts = TreeStyle()
    for i in range(5):
        ts.legend.add_face(CircleFace(20, colour_list[i]), column=0)
        ts.legend.add_face(TextFace('week' + weeks[i]), column=1)
    ts.legend_position = 2
    ts.show_leaf_name = True
    ts.branch_vertical_margin = 15
    ts.rotation = 90
    ns = NodeStyle()
    ns["size"] = 1
    ns.hz_line_width = 10
    ns.vt_line_width = 10
    edge = 0
    for node in t.traverse():
        node.name = node.name.replace("'", "")
        node.name = node.name.replace(".", ",")
        name = node.name.split(' ')[0]
        print name
        if name in clone_info.keys():
            style_node(node, colour_list[int(clone_info[name][0])-1], int(int(clone_info[name][1])/10)+5)
        if not node.is_leaf() and node.name != 'NoName':
                f = TextFace(node.name)
                f.margin_top = 2.5
                f.margin_bottom = 2.5
                f.margin_right = 2.5
                f.margin_left = 2.5
                node.add_face(f, column=0, position="branch-top")
    t.render(image_file, tree_style = ts)
Beispiel #4
0
    def render_annotate(newick_path, output_path):
        """Render the annotated tree, showing internal node names.
           The output_path should end in .PNG, .PDF or .SVG: this will determine the format.

           To aid in testing, if output_path is None, the tree is shown rather than rendered.
        """
        tree = Tree(newick_path, format=1)

        ts = TreeStyle()
        ts.show_leaf_name = True
        ts.branch_vertical_margin = 15

        ns = NodeStyle()
        ns["size"] = 1

        edge = 0
        for node in tree.traverse():
            node.name = node.name.replace("'", "")
            node.name = node.name.replace("+", ",")
            if not node.is_leaf() and node.name != "NoName":
                f = TextFace(node.name)
                f.margin_top = 5
                f.margin_bottom = 5
                f.margin_right = 10
                f.margin_left = 10
                edge += 1
                node.add_face(f, column=0, position="branch-top")

        if output_path is None:
            tree.show(tree_style=ts)
        else:
            tree.render(output_path)
Beispiel #5
0
def draw_tree(tree, file):
    root = tree.get_midpoint_outgroup()
    try:
      tree.set_outgroup(root)
    except:
      pass
    root = tree.get_tree_root()
    root.dist = 0
    add_sig(tree)
    ts = TreeStyle()
    ts.branch_vertical_margin = 1
    #ts.scale = 1500
    ts.show_leaf_name = False
    #ts.show_branch_support = True
    leg_file = path.join(path.expanduser('~'), 'Perl', 'Modules', 'TreeLegend.png')   
    leg_face= ImgFace(img_file=leg_file)
    leg_face.margin_left, leg_face.margin_top = 5, 5
    ts.legend.add_face(leg_face, column=1)
    ts.legend_position=1

    title_face = TextFace(text=file.split('.')[0])
    title_face.margin_left, title_face.margin_top = 10, 5
    ts.title.add_face(title_face, column=1)
    (ts.margin_left, ts.margin_right) = (5,5)
    tree.render(file, tree_style=ts, w=6000, units='mm')
Beispiel #6
0
def rotation_layout(node):
    if node.is_leaf():
        F = TextFace(node.name, tight_text=True)
        F.rotation = randint(0, 360)
        add_face_to_node(TextFace("third" ), node, column=8, position="branch-right")
        add_face_to_node(TextFace("second" ), node, column=2, position="branch-right")
        add_face_to_node(F, node, column=0, position="branch-right")

        F.border.width = 1
        F.inner_border.width = 1
Beispiel #7
0
def drawTree(nwfile, outfile):
    from ete2 import Tree, TreeStyle, TextFace
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.layout_fn = my_layout
    ts.branch_vertical_margin = 12.75
    ts.orientation = 1
    titleFace = TextFace("Phylogenetic Tree", fsize=18, fgcolor="white")
    titleFace.margin_top = 15
    ts.title.add_face(titleFace, column=1)

    t = Tree(nwfile)
    t.render(outfile, tree_style=ts)
Beispiel #8
0
 def ly_tax_labels(node):
     if node.is_leaf():
         c = LABEL_START_COL
         largest = 0
         for tname in TRACKED_CLADES:
             if hasattr(node, "named_lineage") and tname in node.named_lineage:
                 linF = TextFace(tname, fsize=10, fgcolor='white')
                 linF.margin_left = 3
                 linF.margin_right = 2
                 linF.background.color = lin2color[tname]
                 add_face_to_node(linF, node, c, position='aligned')
                 c += 1
         
         for n in xrange(c, len(TRACKED_CLADES)):
             add_face_to_node(TextFace('', fsize=10, fgcolor='slategrey'), node, c, position='aligned')
             c+=1
Beispiel #9
0
def prettifyTree(ete_tree, leaf_font_size = 32, branch_support_size = 20, show_bootstraps = True, title=None, ts = None):
    ''' Perform standardized functions to make the ETE trees easier to read:
    - Make the branch support bigger
    - Make the leaf fonts bigger
    - Turn off elongating branches for visualization purposes (i.e. make sure the real branch lengths are represented)
    - Change both to standard font (Times)
    - Standardize the tree's width (calculate based on the maximum length from the root to a tip)
    - (optional) add title to tree
    '''

    for node in ete_tree.traverse():
        if node.is_leaf():
            # Make text faces with name = the existing node name but with big font.
            # A side effect of this is that we also get the annotations lined up
            F = faces.TextFace(node.name, ftype="Times", fsize=leaf_font_size)
            node.add_face(F, 0, position="aligned")
        else:
            if show_bootstraps:
                # Make branch support bigger
                F = faces.TextFace(node._support, ftype="Times", fsize=branch_support_size)
                node.add_face(F, 0, position="branch-top")

    #correct the long root node bug (fixed in next release)
    ete_tree.dist=0

    # Optionally create a new TreeStyle if we are not passing in an old one.
    if ts is None:
        ts = TreeStyle()

    # This "fixes" the dashed line issue but makes the tree look terrible.
    # There may be no way around this (and it's likely other tree viewers do the same thing
    # but just don't tell you).
    #ts.optimal_scale_level = "full"

    # We made these bigger so lets turn off the default ones too.
    ts.show_branch_support = False
    ts.show_leaf_name = False

    if title is not None:
        ts.title.clear()
        title = TextFace(title)
        title.hz_align = True
        title.fsize = 52
        ts.title.add_face(title, 0)

    return ete_tree, ts
Beispiel #10
0
def layout(node):
    node.img_style["size"] = 0
    node.img_style['hz_line_width'] = 2
    node.img_style['vt_line_width'] = 2
    
    if node.is_leaf():
        # parse names
        fields = node.orig_name.split("__")
        name = fields[1].replace('_', ' ')
        code = "%s" %fields[0].strip()

        # Specie name
        nF = TextFace(name, fsize=12, fgcolor='#444', fstyle='italic')
        add_face_to_node(nF, node, column=0, position='aligned')

        # Species code
        cF = TextFace(code, fsize=12, fgcolor='grey')
        cF.margin_left = 4
        cF.margin_right = 4
        add_face_to_node(cF, node, column=1, position='branch-right')

        # Lead node styling 
        node.img_style['hz_line_color'] = "green"
        node.img_style['vt_line_color'] = "green"
        
    else:
        # L90: green, L70: blue, L50: dark blue, L30: pink and L10: red. For the species
        # tree discordance test we collapse all branches below L90.
        B = float(node.B)
        if B >= 90:
            color = "green"
        elif B >= 70:
            color = "blue"
        elif B >= 50:
            color = "darkblue"
        elif B >= 30:
            color = "pink"
        elif B >= 10:
            color = "red"
        else:
            color = "yellow"
    
        node.img_style['hz_line_color'] = color
        node.img_style['vt_line_color'] = color
def add_faces(cur, field, leaf, label_info, colours, bg_colour, outfile):
      y = 0
      for x in range(len(label_info)):
        if x == 0:
          label_info[x] += ':'
        elif x < len(label_info) - 1:
          label_info[x] += ','
        if '.svg' in outfile:
            padding = 1 + len(label_info[x]) /5  #this isn't 
            label_info[x] += ' ' * padding
        label = TextFace(label_info[x])
        if '.svg' in outfile:
          label.margin_left = 20
        else:
          label.margin_left = 5        
        label.fgcolor = colours[x]
        label.background.color = bg_colour
        if x > 1 and x % 3 == 0:
          y += 3
        leaf.add_face(label, column=x-y+1, position="branch-right")
	def render_tree_image(self, filename):
		def my_layout(node):
			name_faces = AttrFace("name", fsize=10, fgcolor = "#0000FF")
			faces.add_face_to_node(name_faces, node, column=0, position="branch-right")
		t = Tree("%s;" % self.newick_string(), format = 1)
		s = "0"	
		for n in t.iter_descendants():
			text = TextFace(s)
			if s == "0": s = "1"
			else: s = "0"
			text.fgcolor = "#FF0000";
			nstyle = NodeStyle()
			nstyle['size'] = 15
			nstyle['fgcolor'] = "#333"
			n.set_style(nstyle)
			n.add_face(text, column = 0, position = "branch-top")
		ts = TreeStyle()
		ts.rotation = 90
		ts.show_leaf_name = False
		ts.layout_fn = my_layout
		ts.margin_left = 0
		ts.branch_vertical_margin = 50
		ts.show_scale = False
		t.render(filename, tree_style = ts, w = 2000)
Beispiel #13
0
def MODZ_ALL(DIST_DIR, SCORE, TREE, OUT_DIR, OUTGROUPS):
    #######################################
    #        Load all distances into a data matrix
    #######################################
    OUTGROUP_FILE = open(OUTGROUPS, 'r')
    t0 = time()
    print('************************** Loading data from distance files.')
    DATA = {}
    for DIST_FILE in glob('%s/RAxML_distances.*' % DIST_DIR):
        GENE = os.path.basename(DIST_FILE).split('.')[1]
        JUST_DATA = []
        with open(DIST_FILE, 'r') as INPUT:
            for LINE in INPUT:
                # Find distance by splitting the line on a space tab space, select last item in that list, and stripping out the line break
                DATUM = float(LINE.split(' \t ')[-1].strip('\n'))
                JUST_DATA.append(DATUM)
            JUST_MEDIAN = np.median(JUST_DATA)
        with open(DIST_FILE, 'r') as INPUT:
            for LINE in INPUT:
                # Find distance by splitting the line on a space tab space, select last item in that list, and stripping out the line break
                DATUM = float(LINE.split(' \t ')[-1].strip('\n'))
                RATIO = (DATUM / JUST_MEDIAN)
                #             # For each species we first split on a space-tab-space, then split on just a space. Now we have the two species separated.
                #             # Now we need to remove the seq-id. So we split each species by a triple underscore. Lastly we join the species with a
                #             # triple underscore.
                SPECIES1 = sorted(
                    LINE.split(' \t ')[0].split(' '))[0].split("___")[0]
                SPECIES2 = sorted(
                    LINE.split(' \t ')[0].split(' '))[1].split("___")[0]
                KEY = "___".join([SPECIES1, SPECIES2])
                #Fill Dictionary with the newly found KEY and DATUM
                try:
                    DATA[KEY][GENE] = RATIO
                except KeyError:
                    DATA[KEY] = {GENE: RATIO}
    t1 = time()
    print('Loading took %f seconds' % (t1 - t0))

    #######################################
    #        LOAD SEQ IDS
    #######################################
    SEQ_ARR = {}
    for SEQ_FILE in glob('%s/*.phy' % DIST_DIR):
        GENE = os.path.basename(SEQ_FILE).split('.')[0]
        with open(SEQ_FILE, 'r ') as SEQ_DATA:
            for LINE in SEQ_DATA:
                if LINE and LINE[0].isalpha():
                    SPECIES = LINE.split('___')[0]
                    SEQ_ID = LINE.split('___')[1].split(' ')[0]
                    try:
                        SEQ_ARR[GENE][SPECIES] = SEQ_ID
                    except KeyError:
                        SEQ_ARR[GENE] = {SPECIES: SEQ_ID}
    # GENE = 'KOG3218'
    # TAXON = 'Scrippsiella_trochoidea'
    # TEST = SEQ_ARR[GENE][TAXON]
    # print('%s' % TEST)

#######################################
#        Generate Modified Z-Scores For Each Distance
#######################################

# Create a new matrix by copying the data and dividing each datum by 0.1 * median -
# This centers the distribution on 10 for every distance distribution, which will allow
# normalizing the data (by taking the log) without shifting the distribution into negative
# values which will cause problems for a Z-Score calculation.
    t0 = time()
    print("************************** Calculating Z-Scores")
    DATA_NORMALIZED = copy.deepcopy(DATA)
    for SPECIES___SPECIES, GENES_DATA in DATA_NORMALIZED.iteritems():
        DEMON = 0.1 * np.median(GENES_DATA.values())
        for GENE, DATUM in GENES_DATA.iteritems():
            DATUM_WEIRDED = DATUM / DEMON
            DATUM_NORMALIZED = np.log(DATUM_WEIRDED)
            GENES_DATA[GENE] = DATUM_NORMALIZED
    DATA_MODZ = copy.deepcopy(DATA_NORMALIZED)
    for SPECIES___SPECIES, GENES_DATA in DATA_MODZ.iteritems():
        MEDIAN = np.median(GENES_DATA.values())
        MAD_GENES_DATA = MAD(GENES_DATA.values())
        if MAD_GENES_DATA == 0:
            for GENE, DATUM in GENES_DATA.iteritems():
                if DATUM >= 2.31:
                    GENES_DATA[GENE] = DATUM + 1.2
        else:
            for GENE, DATUM in GENES_DATA.iteritems():
                DATUM_MODZ = (0.6745 * (DATUM - MEDIAN)) / MAD_GENES_DATA
                GENES_DATA[GENE] = DATUM_MODZ
    t1 = time()
    print('Calculating took %f seconds' % (t1 - t0))

    #######################################
    #        Collect Modified Z-Scores by Gene
    #######################################

    # So here I am generating a new data matrix where all the data is organized by gene, rather
    # than by species-to-species distances. Each gene will have a list of species, and each species
    # will have a list of distances. Lots of nested dictionaries, but it makes sense to me.

    GENE_DATA = {}
    for SPECIES___SPECIES, GENES_DATA in DATA_NORMALIZED.iteritems():
        SPECIES_1 = SPECIES___SPECIES.split('___')[0]
        SPECIES_2 = SPECIES___SPECIES.split('___')[1]
        for GENE, DATUM in GENES_DATA.iteritems():
            try:
                GENE_DATA[GENE][SPECIES_1][SPECIES___SPECIES] = DATUM
            except KeyError:
                try:
                    GENE_DATA[GENE][SPECIES_1] = {}
                    GENE_DATA[GENE][SPECIES_1][SPECIES___SPECIES] = DATUM
                except KeyError:
                    GENE_DATA[GENE] = {}
                    GENE_DATA[GENE][SPECIES_1] = {}
                    GENE_DATA[GENE][SPECIES_1][SPECIES___SPECIES] = DATUM
            try:
                GENE_DATA[GENE][SPECIES_2][SPECIES___SPECIES] = DATUM
            except KeyError:
                try:
                    GENE_DATA[GENE][SPECIES_2] = {}
                    GENE_DATA[GENE][SPECIES_2][SPECIES___SPECIES] = DATUM
                except KeyError:
                    GENE_DATA[GENE] = {}
                    GENE_DATA[GENE][SPECIES_2] = {}
                    GENE_DATA[GENE][SPECIES_2][SPECIES___SPECIES] = DATUM

# First loop through text file and save the taxa from the Outgroup line to a list
    OUTGROUP1 = []
    OUTGROUP2 = []
    OUTGROUP3 = []
    # Make lists of each outgroup
    for LINE in OUTGROUP_FILE:
        if LINE.split()[0] == "Outgroup1":
            for OUT in LINE.split()[2:]:
                OUTGROUP1.append(OUT)
        if LINE.split()[0] == "Outgroup2":
            for OUT in LINE.split()[2:]:
                OUTGROUP2.append(OUT)
        if LINE.split()[0] == "Outgroup3":
            for OUT in LINE.split()[2:]:
                OUTGROUP3.append(OUT)

#######################################
#        COUNT OUTLIERS AND WRITE FILES
#######################################
    t0 = time()
    print("************************** Writing Output")
    for GENE, VALUES in GENE_DATA.iteritems():
        BADSPECIES = {}
        TOTAL_SPECIES = len(VALUES.keys())
        # print( '%s' % TOTAL_SPECIES)
        TOTAL_COMPARISONS = ((TOTAL_SPECIES - 1) * TOTAL_SPECIES) / 2
        # print( '%s' % TOTAL_COMPARISONS)
        for SPECIES, DISTANCES in VALUES.iteritems():
            SPECIES_COUNT = 0
            for SPECIES___SPECIES, DATUM in DISTANCES.iteritems():
                SPECIES_COUNT = float(SPECIES_COUNT) + float(DATUM)
            # print( '%s' % SPECIES_COUNT )
            RATIO = float(
                float(SPECIES_COUNT) / float(TOTAL_COMPARISONS)) * 1000
            # print( '%s' % RATIO )
            #     if DATUM >= 3:
            #         SPECIES_COUNT = SPECIES_COUNT + 1
            # PERCENT_OUTLIERS = float( ( float(SPECIES_COUNT) / float(TOTAL_SPECIES) ) * 100 )
            # if PERCENT_OUTLIERS > float(SCORE):
            if RATIO > float(SCORE):
                if SPECIES not in BADSPECIES.keys():
                    BADSPECIES[SPECIES] = RATIO
        for TAXON in BADSPECIES.keys():
            with open('%s/outlier_taxa.%s.txt' % (OUT_DIR, GENE),
                      'a') as OUT_OUT:
                OUT_OUT.write("%s\n" % TAXON)
            with open('%s/%s_seqids.txt' % (OUT_DIR, TAXON), 'a') as OUT_IDS:
                TAXON_SEQ = SEQ_ARR[GENE][TAXON]
                OUT_IDS.write("%s\n" % TAXON_SEQ)

    #######################################
    #        GENERATE TREES
    #######################################
    # Root the tree using the outgroup specified in the text file
    # Next check if our outgroup taxa are in the tree and create a new list of just species present.
        TREE_LIST = {}
        # Make list of all species in tree.
        T = Tree("%s/RAxML_result.%s.constrained.tre" % (TREE, GENE))
        for LEAF in T:
            SPECIES = LEAF.name.split("___")[0]
            SEQID = LEAF.name.split("___")[1]
            TREE_LIST[SPECIES] = SEQID
        NEW_OUTGROUP = []
        for SPECIES, SEQID in TREE_LIST.iteritems():
            if SPECIES in OUTGROUP1:
                FULL_NAME = "___".join([SPECIES, SEQID])
                NEW_OUTGROUP.append(FULL_NAME)
        # Root tree using the Outgroup taxa that are present, and if no outgroup taxa are present use the midpoint method to root the tree.
        if len(NEW_OUTGROUP) > 1:
            ANCESTOR = T.get_common_ancestor(NEW_OUTGROUP)
            T.set_outgroup(ANCESTOR)
        if len(NEW_OUTGROUP) == 1:
            T.set_outgroup(NEW_OUTGROUP[0])
        if len(NEW_OUTGROUP) < 1:
            for SPECIES, SEQID in TREE_LIST.iteritems():
                if SPECIES in OUTGROUP2:
                    FULL_NAME = "___".join([SPECIES, SEQID])
                    NEW_OUTGROUP.append(FULL_NAME)
            if len(NEW_OUTGROUP) > 1:
                ANCESTOR = T.get_common_ancestor(NEW_OUTGROUP)
                T.set_outgroup(ANCESTOR)
            if len(NEW_OUTGROUP) == 1:
                T.set_outgroup(NEW_OUTGROUP[0])
            if len(NEW_OUTGROUP) < 1:
                for SPECIES, SEQID in TREE_LIST.iteritems():
                    if SPECIES in OUTGROUP3:
                        FULL_NAME = "___".join([SPECIES, SEQID])
                        NEW_OUTGROUP.append(FULL_NAME)
                if len(NEW_OUTGROUP) > 1:
                    ANCESTOR = T.get_common_ancestor(NEW_OUTGROUP)
                    T.set_outgroup(ANCESTOR)
                if len(NEW_OUTGROUP) == 1:
                    T.set_outgroup(NEW_OUTGROUP[0])
                if len(NEW_OUTGROUP) < 1:
                    print(
                        "%s: No outgroup taxa present. Rooting at midpoint instead. This may break a monophyletic group."
                        % GENE)
                    R = T.get_midpoint_outgroup()
                    T.set_outgroup(R)
        # Write a new tree file with the long branches indicated and their clades indicated
        for CLADE in T.traverse():
            CLADE.set_style(nstyle)
        for LEAF in T:
            SPECIES = LEAF.name.split('___')[0]
            if SPECIES in BADSPECIES.keys():
                LEAF.img_style = RED
                LEAF.add_face(TextFace("\t%.2f" % BADSPECIES[SPECIES]),
                              column=1,
                              position="branch-right")
        T.render('%s/%s.tre.pdf' % (OUT_DIR, GENE), tree_style=ts)
    t1 = time()
    print('Writing took %f seconds' % (t1 - t0))
Beispiel #14
0
def showTree(delimitation, scale = 500, render = False, fout = "", form = "svg", show_support = False):
	"""delimitation: species_setting class"""
	tree = delimitation.root
	style0 = NodeStyle()
	style0["fgcolor"] = "#000000"
	style0["vt_line_color"] = "#0000aa"
	style0["hz_line_color"] = "#0000aa"
	style0["vt_line_width"] = 2
	style0["hz_line_width"] = 2
	style0["vt_line_type"] = 0 
	style0["hz_line_type"] = 0
	style0["size"] = 0
	
	tree.clear_face()
	for node in tree.get_descendants():
		node.set_style(style0)
		node.img_style["size"] = 0
		node.clear_face()
	
	tree.set_style(style0)
	tree.img_style["size"] = 0
	
	style1 = NodeStyle()
	style1["fgcolor"] = "#000000"
	style1["vt_line_color"] = "#ff0000"
	style1["hz_line_color"] = "#0000aa"
	style1["vt_line_width"] = 2
	style1["hz_line_width"] = 2
	style1["vt_line_type"] = 0 
	style1["hz_line_type"] = 0
	style1["size"] = 0
	
	style2 = NodeStyle()
	style2["fgcolor"] = "#0f0f0f"
	style2["vt_line_color"] = "#ff0000"
	style2["hz_line_color"] = "#ff0000"
	style2["vt_line_width"] = 2
	style2["hz_line_width"] = 2
	style2["vt_line_type"] = 0 
	style2["hz_line_type"] = 0
	style2["size"] = 0
	
	for node in delimitation.active_nodes:
		node.set_style(style1)
		node.img_style["size"] = 0
		for des in node.get_descendants():
			des.set_style(style2)
			des.img_style["size"] = 0
	
	for node in delimitation.root.traverse(strategy='preorder'):
		if show_support and hasattr(node, "bs"):
			if node.bs == 0.0:
				node.add_face(TextFace("0", fsize = 8), column=0, position = "branch-top")
			else:
				node.add_face(TextFace("{0:.2f}".format(node.bs), fsize = 8), column=0, position = "branch-top")
			
	ts = TreeStyle()
	"""scale pixels per branch length unit"""
	ts.scale =  scale 
	ts.branch_vertical_margin = 7
	if render:
		tree.render(fout+"."+form, tree_style=ts)
	else:
		tree.show(tree_style=ts)
def draw_tree_regions(clusterrunid, t, ts, cur, greyout=3):
    '''
    Draw the neighborhoods around each of the genes in a gene tree given the cluster and run IDs and the tree (t)

    clusterrunid is the run ID to use to identify homoloous clusters and ts is the treeStyle object associeted with the
    ETE tree t

    cur is a SQLite cursor object for the database

    The arrows are grayed out if less than "greyout" genes appear in a given cluster.
    '''

    # DEPRECIATED
    t, tblastnadded = removeLeadingDashes(t)

    unsanitized = []
    for genename in t.get_leaf_names():
        unsanitized.append(unsanitizeGeneId(genename))


    # Create a list of SeqFeature objects for the neighbors of each gene in the tree
    # If passed a TBLASTN hit it will create seq objects for every gene surrounding the TBLASTN hit and
    # for the TBLASTN hit itself.
    #
    # Nothing is added if we can't find that ID in the database or the ID is badly formatted.
    seqfeatures={}
    for genename in unsanitized:
        sys.stderr.write("Getting gene neighborhoods for gene %s...\n" %(genename) )
        features_for_genename = makeSeqFeaturesForGeneNeighbors(genename, clusterrunid, cur)
        if len(features_for_genename) > 0:
            seqfeatures[genename] = features_for_genename
        else:
            # Try TBLASTN and if that doesn't work, just give up.
            try:
                features_for_tblastn = makeSeqObjectsForTblastnNeighbors(genename, clusterrunid, cur)
                seqfeatures[genename] = features_for_tblastn
            except ValueError:
                sys.stderr.write("WARNING: Unable to find entries for gene or TBLASTN hit %s in database\n" %(genename) )
                pass

    # Don't bother trying the rest if nothing matches at all.
    if len(seqfeatures.keys()) == 0:
        sys.stderr.write("WARNING: No genes in input tree had entries in the database so no neighborhoods will be drawn\n")
        return t, ts

    # Get a list of clusters containing these genes
    allclusters = []
    for gene in seqfeatures:
        for feature in seqfeatures[gene]:
            allclusters.append(feature.qualifiers["cluster_id"])

    uniqueclusters = set(allclusters)

    # Get clusters that have enough members to bother trying to color them (as determined by
    # the greyout keyword)
    multipleclusters = [c for c in uniqueclusters if allclusters.count(c) >= greyout]

    # Don't die if nothing has enough clusters...
    if len(multipleclusters) > 0:
        getcolor = colormap(multipleclusters)
    else:
        getcolor = {}

    #also add in grey (0.5,0.5,0.5 in RGB) for all others
    singleclusters = [c for c in uniqueclusters if allclusters.count(c) < greyout]
    getcolor.update([(sc, (0.5,0.5,0.5)) for sc in singleclusters])

    #generate the region images for any leaf that has them, and map onto the tree
    #we will want to know the max width to make the figures
    widths = []
    for genelocs in seqfeatures.values():
        start, end = regionlength(genelocs)
        widths.append(abs(end - start))
    maxwidth = max(widths)

    for leaf in t.iter_leaves():
        newname = unsanitizeGeneId(leaf.name)
        # Not all genes necessarily are in the database and we don't want to crash if that happens.
        # Instead, Just don't print a neighborhood for them.
        try: 
            genelocs = seqfeatures[newname]
        except KeyError: 
            continue 
        sys.stderr.write("Making region drawing for gene ID %s...\n" %(newname))
        imgfileloc = make_region_drawing(genelocs, getcolor, newname, maxwidth)
        imageFace = faces.ImgFace(imgfileloc)
        leaf.add_face(imageFace, column=2, position = 'aligned')
        if newname in tblastnadded:
            leaf.add_face(TextFace("TBlastN added", fsize=30), column=3, position = 'aligned')

    #add legend for clusters
    ts = treelegend(ts, getcolor, greyout)

    return t, ts
    applyNodeStyle(tE5B, "root", LINEWIDTH, "White", POINTSIZE, "Black")
    applyNodeStyle(tE5B, " Bacteria", LINEWIDTH, "Silver", POINTSIZE, "Black")
    applyNodeStyle(tE5B, " Eukaryota", LINEWIDTH, "Gainsboro", POINTSIZE,
                   "Black")
    applyNodeStyle(tE5B, " Viruses", LINEWIDTH, "DarkGrey", POINTSIZE, "Black")

    applyNodeStyle(tE5C, "root", LINEWIDTH, "White", POINTSIZE, "Black")
    applyNodeStyle(tE5C, " Bacteria", LINEWIDTH, "Silver", POINTSIZE, "Black")
    applyNodeStyle(tE5C, " Eukaryota", LINEWIDTH, "Gainsboro", POINTSIZE,
                   "Black")
    applyNodeStyle(tE5C, " Viruses", LINEWIDTH, "DarkGrey", POINTSIZE, "Black")

    FONTSIZE = 24
    for leaf in tB50square.iter_leaves():
        T = TextFace(' ' + leaf.name, fsize=(FONTSIZE), fgcolor='Black')
        leaf.add_face(T, 0, position="aligned")

    #square_style.show_branch_length = True;
    #tB50square.show(tree_style = square_style)
    tB50 = copy.deepcopy(tB50square)

    for leaf in tE5A.iter_leaves():
        T = TextFace(' ' + leaf.name, fsize=(FONTSIZE), fgcolor='Black')
        leaf.add_face(T, 0, position="aligned")

    for leaf in tE5B.iter_leaves():
        T = TextFace(' ' + leaf.name, fsize=(FONTSIZE), fgcolor='Black')
        leaf.add_face(T, 0, position="aligned")

    for leaf in tE5C.iter_leaves():
def makeGraphsToFile(t, filenameStem, outputpath, count):
    t_back = copy.deepcopy(t)

    #All Nodes
    applyNodeStyle(t, "root", LINEWIDTH, "White", POINTSIZE, "Black")
    #Other Kingdoms
    applyNodeStyle(t, "uk_Prokaryota", LINEWIDTH, "lightgreen", POINTSIZE,
                   "Black")
    applyNodeStyle(t, "k_Fungi", LINEWIDTH, "wheat", POINTSIZE, "Black")
    #Stramenopiles
    applyNodeStyle(t, "o_Peronosporales", LINEWIDTH, "goldenrod", POINTSIZE,
                   "Black")
    applyNodeStyle(t, "o_Saprolegniales", LINEWIDTH, "goldenrod", POINTSIZE,
                   "Black")

    t2 = copy.deepcopy(t)

    for n in t.iter_leaves():
        #this creates text labels
        (control, infect) = count[n.name]
        #addition of spaces code: +' '
        # helps readability in a few cases, but overall stretches the graph
        T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='MediumBlue')
        n.add_face(T, 0, position="aligned")
        T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='FireBrick')
        n.add_face(T, 1, position="aligned")
        #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black')
        #n.add_face( T, 2, position="aligned" )
        T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2),
                     fgcolor='Black')  #add a space so not too crowded
        n.add_face(T, 2, position="aligned")

    circular_style = TreeStyle()
    circular_style.mode = "c"  # draw tree in circular mode
    circular_style.scale = 20
    circular_style.scale = 31  #length of 1 level transition in tree
    circular_style.show_scale = False
    circular_style.show_leaf_name = False
    #circular_style.allow_face_overlap = True
    #t.show(tree_style=circular_style)
    t.render(outputpath + filenameStem + "_color_v1.png",
             tree_style=circular_style,
             w=WIDTH,
             dpi=DPI)

    ### COLOR -- Alternate ordering of text labels
    ### using copied t2
    '''    CAUSING BUG -- don't need to reload from wrong file
    count = dict()
    input = open(filename)
    for line in input:
        line = line.split()
        name = line[1]
        control = (int( line[2]))
        infect = (int(line[3]))
        count[name] = (control, infect)
    '''

    for n in t2.iter_leaves():
        #this creates text labels
        (control, infect) = count[n.name]
        #addition of spaces code: +' '
        # helps readability in a few cases, but overall stretches the graph
        T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='MediumBlue')
        n.add_face(T, 1, position="aligned")
        T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='FireBrick')
        n.add_face(T, 2, position="aligned")
        #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black')
        #n.add_face( T, 2, position="aligned" )
        T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2),
                     fgcolor='Black')  #add a space so not too crowded
        n.add_face(T, 0, position="aligned")
    t2.render(outputpath + filenameStem + "_color_v2.png",
              tree_style=circular_style,
              w=WIDTH,
              dpi=DPI)

    t = copy.deepcopy(t_back)

    ###GREYSCALE
    #t = buildTree( filename, names, nodes, filter = taxa_accepted )
    t = copy.deepcopy(t_back)
    #All Nodes
    applyNodeStyle(t, "root", LINEWIDTH, "White", POINTSIZE, "Black")
    #Other Kingdoms
    #applyNodeStyle(t,"uk_Prokaryota",LINEWIDTH,"White",POINTSIZE,"Black") #already defined by all nodes
    applyNodeStyle(t, "k_Fungi", LINEWIDTH, "Silver", POINTSIZE, "Black")
    applyNodeStyle(t, "f_Retroviridae", LINEWIDTH, "GainsBoro", POINTSIZE,
                   "Black")
    #Stramenopiles
    applyNodeStyle(t, "o_Peronosporales", LINEWIDTH, "DarkGrey", POINTSIZE,
                   "Black")
    applyNodeStyle(t, "o_Saprolegniales", LINEWIDTH, "DarkGrey", POINTSIZE,
                   "Black")

    t2 = copy.deepcopy(t)

    for n in t.iter_leaves():
        #this creates text labels
        #BUG OCCURS HERE: can't find citrobacter, but it isn't in the previous version of t? (above)
        (control, infect) = count[n.name]
        #addition of spaces code: +' '
        # helps readability in a few cases, but overall stretches the graph
        T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='Black')
        n.add_face(T, 1, position="aligned")
        T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='DimGray')
        n.add_face(T, 2, position="aligned")
        #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black')
        #n.add_face( T, 2, position="aligned" )
        T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2),
                     fgcolor='Black')  #add a space so not too crowded
        n.add_face(T, 0, position="aligned")

    #t.show(tree_style=circular_style)
    t.render(outputpath + filenameStem + "_grey_v1.png",
             tree_style=circular_style,
             w=WIDTH,
             dpi=DPI)

    ### GREY -- ALTERNATE ordering of text labels
    ### using copied t2
    for n in t2.iter_leaves():
        #this creates text labels
        (control, infect) = count[n.name]
        #addition of spaces code: +' '
        # helps readability in a few cases, but overall stretches the graph
        T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='Black')
        n.add_face(T, 0, position="aligned")
        T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='DimGray')
        n.add_face(T, 1, position="aligned")
        #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black')
        #n.add_face( T, 2, position="aligned" )
        T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2),
                     fgcolor='Black')  #add a space so not too crowded
        n.add_face(T, 2, position="aligned")

    #t.show(tree_style=circular_style)
    t2.render(outputpath + filenameStem + "_grey_v2.png",
              tree_style=circular_style,
              w=WIDTH,
              dpi=DPI)
Beispiel #18
0
def plot_blast_result(tree_file,
                      blast_result_file_list,
                      id2description,
                      id2mlst,
                      check_overlap,
                      ordered_queries,
                      fasta_file2accession,
                      id_cutoff=80,
                      reference_accession='-',
                      accession2hit_filter=False,
                      show_identity_values=True):
    '''
    Projet Staph aureus PVL avec Laure Jaton
    Script pour afficher une phylogénie et la conservation de facteurs de virulence côte à côte
    Nécessite résultats MLST, ensemble des résultats tblastn (facteurs de virulence vs chromosomes),
    ainsi qu'une correspondance entre les accession des génomes et les noms qu'on veut afficher dans la phylogénie. Icemn
    pour les identifiants molis des patients, on les remplace par CHUV n.
    :param tree_file: phylogénie au format newick avec identifiants correspondants à tous les dico utilisés
    :param blast_result_file_list: résultats tblastn virulence factors vs chromosome (seulement best blast)
    :param id2description: identifiants génome utiisé dans l'arbre et description correspondante (i.e S aureus Newman)
    :param id2mlst: identitifiants arbre 2 S. aureus ST type
    :return:
    '''
    import blast_utils
    blast2data, queries = blast_utils.remove_blast_redundancy(
        blast_result_file_list, check_overlap)

    queries_count = {}

    for query in queries:
        queries_count[query] = 0
        for one_blast in blast2data:
            if query in blast2data[one_blast]:
                #print blast2data[one_blast][query]
                if float(blast2data[one_blast][query][0]) > id_cutoff:
                    queries_count[query] += 1
                else:
                    del blast2data[one_blast][query]

    print queries_count
    for query in queries:
        print "Hit counts: %s\t%s" % (query, queries_count[query])
        if queries_count[query] == 0:
            queries.pop(queries.index(query))

    print 'delete columns with no matches ok'
    '''             
    rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld'
        , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE']
    #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH']

    for gene in rm_genes:
        queries.pop(queries.index(gene))
    '''
    #queries = ['selv']
    t1 = Tree(tree_file)
    tss = TreeStyle()
    #tss.show_branch_support = True
    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    t1.set_outgroup(R)
    t1.ladderize()

    ordered_queries_filtered = []
    for query in ordered_queries:
        hit_count = 0
        for lf2 in t1.iter_leaves():
            try:
                accession = fasta_file2accession[lf2.name]
                tmpidentity = blast2data[accession][query][0]
                if float(tmpidentity) > float(id_cutoff):
                    hit_count += 1
            except:
                continue
        if hit_count > 0:
            ordered_queries_filtered.append(query)
            #print 'skippink-----------'

    head = True
    print 'drawing tree'
    print 'n initial queries: %s n kept: %s' % (len(ordered_queries),
                                                len(ordered_queries_filtered))
    for lf in t1.iter_leaves():
        #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right")
        lf.branch_vertical_margin = 0
        #data = [random.randint(0,2) for x in xrange(3)]
        accession = fasta_file2accession[lf.name]
        for col, value in enumerate(ordered_queries_filtered):

            if head:
                if show_identity_values:
                    #'first row, print gene names'
                    #print 'ok!'
                    n = TextFace(' %s ' % str(value))
                    n.margin_top = 2
                    n.margin_right = 2
                    n.margin_left = 2
                    n.margin_bottom = 2
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    #lf.add_face(n, col, position="aligned")
                    tss.aligned_header.add_face(n, col)
                else:
                    n = TextFace(' %s ' % str(value), fsize=6)
                    n.margin_top = 0
                    n.margin_right = 0
                    n.margin_left = 0
                    n.margin_bottom = 0
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    #lf.add_face(n, col, position="aligned")
                    tss.aligned_header.add_face(n, col)
            try:
                identity_value = blast2data[accession][value][0]
                #print 'identity', lf.name, value, identity_value

                if lf.name != reference_accession:
                    if not accession2hit_filter:
                        # m_red
                        color = rgb2hex(m_blue.to_rgba(float(identity_value)))
                    else:
                        # if filter, color hits that are not in the filter in green

                        if accession in accession2hit_filter:
                            if value in accession2hit_filter[accession]:
                                # mred
                                color = rgb2hex(
                                    m_green.to_rgba(float(identity_value)))

                            else:
                                color = rgb2hex(
                                    m_blue.to_rgba(float(identity_value)))
                        else:
                            color = rgb2hex(
                                m_blue.to_rgba(float(identity_value)))
                else:
                    # reference taxon, blue scale
                    color = rgb2hex(m_blue.to_rgba(float(identity_value)))
                #if not show_identity_values:
                #    color = rgb2hex(m_blue.to_rgba(float(identity_value)))

            except:
                identity_value = 0
                color = "white"
            if show_identity_values:
                if float(identity_value) >= float(id_cutoff):

                    if str(identity_value) == '100.00' or str(
                            identity_value) == '100.0':
                        identity_value = '100'
                        n = TextFace("%s   " % identity_value)
                    else:
                        #    identity_value = str(round(float(identity_value), 1))

                        n = TextFace("%.2f" % round(float(identity_value), 2))
                    if float(identity_value) > 95:
                        n.fgcolor = "white"

                    n.opacity = 1.
                else:
                    identity_value = '-'
                    n = TextFace(' %s ' % str(identity_value))
                    n.opacity = 1.
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = color
                lf.add_face(n, col, position="aligned")
            else:

                if float(identity_value) >= float(id_cutoff):

                    # don't show identity values
                    n = TextFace('  ')

                    n.margin_top = 0
                    n.margin_right = 0
                    n.margin_left = 0
                    n.margin_bottom = 0
                    #n.color = color
                    n.inner_background.color = color
                    lf.add_face(n, col, position="aligned")

        try:
            accession = fasta_file2accession[lf.name]
            lf.name = ' %s (%s)' % (id2description[accession],
                                    id2mlst[lf.name])
        except KeyError:
            print '--------', id2description
            lf.name = ' %s (%s)' % (lf.name, id2mlst[lf.name])
        head = False

    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 0.9:
            #mundo = TextFace("%s" % str(n.support))
            #n.add_face(mundo, column=1, position="branch-bottom")
            nstyle["fgcolor"] = "blue"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    print 'rendering tree'
    t1.render("profile.svg", dpi=1000, h=400, tree_style=tss)
Beispiel #19
0
def main(argv):
  treefilename = ''
  outfilename = ''
  database = 'test'
  usage = 'ColourTree.py -t <treefile> -o <outfile>'
  try:
    opts, args = getopt.getopt(argv,"ht:o:d:",["tree=","out=","db="])
    if not opts:
      raise getopt.GetoptError('no opts')
  except getopt.GetoptError:
    print usage
    sys.exit(2)
  for opt, arg in opts:
    if opt == "-h":
       print usage
       sys.exit()
    elif opt in ("-t", "--tree"):
       treefilename = arg
    elif opt in ("-o", "--out"):
       outfilename = arg
    elif opt in ("-d", "--db"):
       database = arg

  tree = Tree(treefilename)
  
  if not outfilename:
    outfilename = treefilename.replace(".nwk", ".pdf")
  con = mdb.connect('localhost', 'root', '', database);
  with con:
    cur = con.cursor()
    for leaf in tree:
      name = leaf.name
      if name.find('KRAUS') > -1:
        color = 'Green'  
      elif name.find('MOEL') > -1:
        color = 'Red'  
      elif name.find('UNC') > -1:
        color = 'Orange'  
      elif name.find('WILD') > -1:
        color = 'MediumBlue'  
      else:
        cur.execute("Select Species.Genus, Species.Species FROM Species, Sequences WHERE Species.abbreviation = Sequences.species AND Sequences.seqid = %s", name)
        try:
          (genus, species) = cur.fetchone()
          leaf.name = '_'.join((genus[0], species, leaf.name))
        except TypeError, e:
          print e
          continue
        if leaf.name.find('kraussiana') > -1:
          color = 'LightGreen'  
        elif leaf.name.find('willdenowii') > -1:
          color = 'SteelBlue' 
        else:
          color = 'Black'
      label = TextFace(leaf.name, fgcolor=color, fsize=16)
      leaf.add_face(label, column = 0, position="branch-right")
      leaf.add_face(TextFace(' '), column = 1, position="branch-right")
      name = '_'.join(name.split('_')[:-2])
      try:
        cur.execute("SELECT vsd.leaf1, vsd.leaf2, vsd.leaf3, vsd.leaf4 FROM vsd, orfs WHERE vsd.gene_id = orfs.gene_id AND orfs.seqid = %s", name)
        vsd = cur.fetchone()
        cur.execute("SELECT normalized.leaf1, normalized.leaf2, normalized.leaf3, normalized.leaf4 FROM normalized, orfs WHERE normalized.gene_id = orfs.gene_id AND orfs.seqid = %s", name)        
        normalized = cur.fetchone()
        for x in range(4):
          if vsd[x] == 'none':
            continue
          expression_label= TextFace(' %s ' % normalized[x], fsize=16)
          expression_label.background.color = get_colour(vsd[x])
          expression_label.border.width = 1
          expression_label.margin_left, expression_label.margin_right, expression_label.margin_top, expression_label.margin_bottom = 1,1,2,1
          # This isn't working right : ( expression_label.border.width=1
          leaf.add_face(expression_label, column = x+2, position="branch-right")
      except TypeError:
       continue



    draw_tree(tree, outfilename)   
Beispiel #20
0
def taxo_msa(outfile='taxo_msa.svg',
             taxids=[],
             annotation='',
             msa=[],
             title='',
             width=2000):
    """
    Visualize MSA together with a taxonomy tree
    taxids - list of taxids in the same order as seqs in msa
    """
    # taxid2gi={f_df.loc[f_df.gi==int(gi),'taxid'].values[0]:gi for gi in list(f_df['gi'])}
    # gi2variant={gi:f_df.loc[f_df.gi==int(gi),'hist_var'].values[0] for gi in list(f_df['gi'])}

    # msa_dict={i.id:i.seq for i in msa_tr}
    ncbi = NCBITaxa()
    taxids = map(int, taxids)

    t = ncbi.get_topology(taxids, intermediate_nodes=False)
    a = t.add_child(name='annotation')
    a.add_feature('sci_name', 'annotation')
    t.sort_descendants(attr='sci_name')
    ts = TreeStyle()

    def layout(node):
        # print node.rank
        # print node.sci_name
        if getattr(node, "rank", None):
            if (node.rank in ['order', 'class', 'phylum', 'kingdom']):
                rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred")
                node.add_face(rank_face, column=0, position="branch-top")
        if node.is_leaf():
            sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue")
            node.add_face(sciname_face, column=0, position="branch-right")
        if node.is_leaf() and not node.name == 'annotation':
            s = str(msa[taxids.index(int(node.name))].seq)
            seqFace = SeqMotifFace(
                s, [[0, len(s), "seq", 10, 10, None, None, None]],
                scale_factor=1)
            add_face_to_node(seqFace, node, 0, position="aligned")
            # gi=taxid2gi[int(node.name)]
            add_face_to_node(TextFace(' ' +
                                      msa[taxids.index(int(node.name))].id),
                             node,
                             column=1,
                             position="aligned")
            # add_face_to_node(TextFace('      '+str(int(node.name))+' '),node,column=2, position = "aligned")
            # add_face_to_node(TextFace('      '+str(gi2variant[gi])+' '),node,column=3, position = "aligned")

        if node.is_leaf() and node.name == 'annotation':
            if (annotation):
                s = annotation
                # get_hist_ss_in_aln_as_string(msa_tr)
            else:
                s = ' ' * len(msa[0].seq)
            seqFace = SeqMotifFace(
                s, [[0, len(s), "seq", 10, 10, None, None, None]],
                scale_factor=1)
            add_face_to_node(seqFace, node, 0, position="aligned")
            add_face_to_node(TextFace(' ' + 'SEQ_ID'),
                             node,
                             column=1,
                             position="aligned")
            # add_face_to_node(TextFace('       '+'NCBI_TAXID'+' '),node,column=2, position = "aligned")
            # add_face_to_node(TextFace('       '+'Variant'+'       '),node,column=3, position = "aligned")

    ts.layout_fn = layout
    ts.show_leaf_name = False
    ts.title.add_face(TextFace(title, fsize=20), column=0)
    t.render(outfile, w=width, dpi=300, tree_style=ts)
Beispiel #21
0
def main():
    title = ''
    #1. Getting data
    ########################################################
    ########################################################
    # df=pd.read_csv('int_data/seqs_rs_redef.csv') #Histone types info #Does not really seem that we need to redefine variants based on best score.
    df = pd.read_csv('int_data/seqs_rs.csv')  #Histone types info
    fasta_dict = pickle.load(open("int_data/fasta_dict.p", "rb"))  #Sequences

    #2. Filtering - filter initial dataset by type, variant and other parameters
    ########################################################
    ########################################################

    #2.1. Narrow by variant/type
    ########################################################
    title += 'H2A'
    # f_df=df[(df['hist_var']=='canonical_H4')]
    # f_df['hist_var']='canonical_H4'
    f_df = df[(
        (df['hist_var'] == 'canonical_H2A') | (df['hist_var'] == 'H2A.X'))
              & (df['partial'] == False) & (df['non_st_aa'] == False)]
    # f_df=df[((df['hist_var']=='H2A.Z'))&(df['partial']==False)&(df['non_st_aa']==False)]

    # f_df=df[(df['hist_type']=='H2A')]

    print "Number of seqs after narrowing by hist type/var:", len(f_df)

    #2.2. Filter by list of taxonomy clades - restrict sequences to certain taxonomic clades
    #########################################################
    title += ' across cellular organisms'
    # parent_nodes=[9443] #131567 - cellular organisms, 7215 4930 Drosophila and yeast, 9443 - primates
    parent_nodes = [
        131567
    ]  #131567 - cellular organisms, 7215 4930 Drosophila and yeast, 9443 - primates
    #33682 - euglenozoa
    #6656 - arthropods
    # 4751 - fungi
    #5782 - dictostelium
    #This is akin manual removal of bad species
    del_nodes = [5782, 5690]

    print "Selecting taxonomic subset for taxids: ", parent_nodes
    print "while removing taxonomic subset for taxids: ", del_nodes

    taxids = set(parent_nodes)
    for i in parent_nodes:
        taxids.update(ncbi.get_descendant_taxa(i, intermediate_nodes=True))
    for i in del_nodes:
        taxids = taxids.difference(set([i]))
        taxids = taxids.difference(
            set(ncbi.get_descendant_taxa(i, intermediate_nodes=True)))

    f_df = f_df[f_df['taxid'].isin(taxids)]
    print "Number of seq after taxonomic subset: ", len(f_df)

    #2.3.0 Marking number of identical sequence within each species and subspecies.
    #This will simplify further analysis of sequence filtering on similarity
    #We know that all refseqs are duplicated for instance.
    ################################################
    ident = dict()
    new_gis = list()
    tids = set(list(f_df['taxid']))
    for i in tids:
        # print i.name, i.sci_name
        temp_df = f_df[(f_df['taxid'] == i)]
        gis = list(temp_df['gi'])  #this is to limit exec time
        # print gis
        if (len(gis) > 1):
            res = cluster_seq_support({gi: fasta_dict[str(gi)]
                                       for gi in gis},
                                      ident_thresh=1.00)
            ident.update(res)
        else:
            ident.update({gis[0]: 1})

    f_df['ident'] = [ident.get(k, 1) for k in f_df['gi']]
    #where ident - number of identical sequnces for current sepecies/subspecies.
    print "Identity of sequence inside each taxid determined"

    #2.3.1. Calculate number of similar seqs for every seq in tax group
    #########################################################
    # Use powerful method, to get rid of random errors is to identify identical sequences
    # if a sequence is supported by two or more entires - this is good.
    # Here we add a degen column to our data set - showing how many similar sequences are found
    # for a given sequence in its taxonomic clade (genus currently)

    #We will traverse the species tree by species, genus or family, and determine degeneracy level
    degen = dict()
    new_gis = list()
    tids = list(f_df['taxid'])
    t = ncbi.get_topology(tids, intermediate_nodes=True)
    for i in t.search_nodes(rank='family'):
        # print i.name, i.sci_name
        nodeset = list()
        for k in i.traverse():
            nodeset.append(int(k.name))
        temp_df = f_df[(f_df['taxid'].isin(nodeset))]
        gis = list(temp_df['gi'])  #this is to limit exec time
        # print gis
        res = cluster_seq_support({gi: fasta_dict[str(gi)]
                                   for gi in gis},
                                  ident_thresh=1.00)
        degen.update(res)

    # print degen
    f_df['degen'] = [degen.get(k, 1) for k in f_df['gi']]

    #2.3.2. Remove seqs that do not have support outside their species
    # if they are not curated or RefSeq NP.
    ###########################################################

    f_df = f_df.sort(
        ['RefSeq', 'degen'], ascending=False
    )  # so that RefSeq record get priority on removing duplicates
    f_df = f_df[(f_df['degen'] > f_df['ident']) | (f_df['curated'] == True) |
                (f_df['RefSeq'] == 2)]
    print "After removing mined seqs with no support in neighboring species: ", len(
        f_df)

    #2.3.3. Shuffle sequnces, so that upon further selection, RefSeq and high degeneracy get priority
    ###########################################################
    #RefSeq and degenerate sequence get priority
    # title+=' 1ptax'
    f_df = f_df.sort(
        ['RefSeq', 'degen'], ascending=False
    )  # so that RefSeq record get priority on removing duplicates
    # print f_df[0:10]
    # f_df=f_df.drop_duplicates(['taxid','hist_var'])

    #2.4 Take one best representative per specific taxonomic rank (e.g. genus)
    ############################################################
    pruningrank = 'genus'
    print "Pruning taxonomy by ", pruningrank

    title += ' , one seq. per %s' % pruningrank
    #Common ranks: superorder-order-suborder-infraorder-parvorder-superfamily-family-subfamily-genus-species-subspecies
    seqtaxids = list(f_df['taxid'])  #old list
    grouped_taxids = group_taxids(seqtaxids, rank=pruningrank)
    # print seqtaxids
    # print grouped_taxids
    #Now we need to take best representative
    #refseq NP, curated, or the one with largest degeneracy
    new_gis = list()
    for tids in grouped_taxids:
        t_df = f_df[f_df['taxid'].isin(tids)]
        #try take curated first
        if (len(t_df[t_df['curated'] == True]) > 0):
            new_gis.append(t_df.loc[t_df.curated == True, 'gi'].values[0])
            continue
        #try take NP records nest
        #RefSeq 2 means NP, 1 means XP
        if (len(t_df[t_df['RefSeq'] == 2]) > 0):
            new_gis.append(t_df.loc[t_df.RefSeq == 2, 'gi'].values[0])
            continue
        # take best degenerate otherwise
        else:
            t_df = t_df.sort(['degen', 'RefSeq'], ascending=False)
            new_gis.append(t_df['gi'].iloc[0])

    f_df = f_df[f_df['gi'].isin(new_gis)]

    print "After pruning taxonomy we have: ", len(f_df)

    #2.5. Check seq for sanity - needs to be checked!
    ##############################################
    # title+=' seqQC '

    # print "Checkig sequence quality"
    # newgis=list()
    # for i,row in f_df.iterrows():
    #     gi=row['gi']
    #     seq=fasta_dict[str(gi)].seq
    #     hist_type=row['hist_type']
    #     hist_var=row['hist_var']
    #     if(check_hist_length(seq,hist_type,hist_var,5)&check_hist_core_length(seq,hist_type,5)):
    #         newgis.append(gi)
    # f_df=f_df[f_df['gi'].isin(newgis)] #remake the dataframe
    # print len(f_df)

    #3. Make a list of seq with good ids and descriptions
    ##############################################

    f_fasta_dict = {
        key: value
        for (key, value) in fasta_dict.iteritems()
        if int(key) in list(f_df['gi'])
    }
    print len(f_fasta_dict)
    taxid2name = ncbi.get_taxid_translator(list(f_df['taxid']))
    #Relabel sequences gi=> type and organism
    f_fasta_dict = {
        key: SeqRecord(
            id=key,
            description=f_df.loc[f_df.gi == int(key), 'hist_var'].values[0] +
            ' ' + taxid2name[f_df.loc[f_df.gi == int(key), 'taxid'].values[0]],
            seq=value.seq)
        for (key, value) in f_fasta_dict.iteritems()
    }
    #with arbitrary index
    # f_fasta_dict_rel={key: SeqRecord(id=str(index), description=f_hist_df.loc[f_hist_df.gi==key,'hist_var'].values[0]+' '+taxid2names[f_hist_df.loc[f_hist_df.gi==key,'taxid'].values[0]],seq=f_fasta_dict[key].seq) for (index,key) in enumerate(f_fasta_dict) }
    # exit()

    #4. Make MSA
    #################
    #Here we construct MSA
    msa = muscle_aln(f_fasta_dict.values(), gapopen=float(-20))
    AlignIO.write(msa, "int_data/example_msa.fasta", "fasta")

    msa_annot = MultipleSeqAlignment([
        SeqRecord(Seq(''.join(get_hist_ss_in_aln_as_string(msa)).replace(
            ' ', '-')),
                  id='annotation',
                  name='')
    ])
    msa_annot.extend(msa)
    AlignIO.write(msa_annot, "int_data/example_msa_annot.fasta", "fasta")

    for i in range(len(msa)):
        gi = msa[i].id
        msa[i].description = f_fasta_dict[gi].description.replace(
            'canonical', 'ca')
    msa.sort(key=lambda x: x.description)

    #5. Visualize MSA############
    aln2html(msa,
             'example_h2a.html',
             features=get_hist_ss_in_aln_for_html(msa, 'H2A', 0),
             title="canonical H2A alignment",
             description=True,
             field1w=10,
             field2w=35)

    #6. Trim alignment - this is optional
    #6.1. Trim gaps
    # title+=' gaptrim'
    # msa_tr=trim_aln_gaps(msa,threshold=0.8)

    #6.2. Trim to histone core sequence
    msa_tr = trim_hist_aln_to_core(msa)
    # msa_tr=msa
    # print get_hist_ss_in_aln_for_shade(msa_tr,below=True)

    # exit()

    #7. Vizualize MSA with ete2.##########
    taxid2gi = {
        f_df.loc[f_df.gi == int(gi), 'taxid'].values[0]: gi
        for gi in list(f_df['gi'])
    }
    gi2variant = {
        gi: f_df.loc[f_df.gi == int(gi), 'hist_var'].values[0]
        for gi in list(f_df['gi'])
    }

    msa_dict = {i.id: i.seq for i in msa_tr}
    t = ncbi.get_topology(list(f_df['taxid']), intermediate_nodes=False)
    a = t.add_child(name='annotation')
    a.add_feature('sci_name', 'annotation')
    t.sort_descendants(attr='sci_name')
    ts = TreeStyle()

    def layout(node):
        # print node.rank
        # print node.sci_name
        if getattr(node, "rank", None):
            if (node.rank in ['order', 'class', 'phylum', 'kingdom']):
                rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred")
                node.add_face(rank_face, column=0, position="branch-top")
        if node.is_leaf():
            sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue")
            node.add_face(sciname_face, column=0, position="branch-right")
        if node.is_leaf() and not node.name == 'annotation':
            s = str(msa_dict[str(taxid2gi[int(node.name)])])
            seqFace = SeqMotifFace(
                s, [[0, len(s), "seq", 10, 10, None, None, None]],
                scale_factor=1)
            add_face_to_node(seqFace, node, 0, position="aligned")
            gi = taxid2gi[int(node.name)]
            add_face_to_node(TextFace(' ' + str(gi) + ' '),
                             node,
                             column=1,
                             position="aligned")
            add_face_to_node(TextFace('      ' + str(int(node.name)) + ' '),
                             node,
                             column=2,
                             position="aligned")
            add_face_to_node(TextFace('      ' + str(gi2variant[gi]) + ' '),
                             node,
                             column=3,
                             position="aligned")

        if node.is_leaf() and node.name == 'annotation':
            s = get_hist_ss_in_aln_as_string(msa_tr)
            seqFace = SeqMotifFace(
                s, [[0, len(s), "seq", 10, 10, None, None, None]],
                scale_factor=1)
            add_face_to_node(seqFace, node, 0, position="aligned")
            add_face_to_node(TextFace(' ' + 'NCBI_GI' + ' '),
                             node,
                             column=1,
                             position="aligned")
            add_face_to_node(TextFace('       ' + 'NCBI_TAXID' + ' '),
                             node,
                             column=2,
                             position="aligned")
            add_face_to_node(TextFace('       ' + 'Variant' + '       '),
                             node,
                             column=3,
                             position="aligned")

    ts.layout_fn = layout
    ts.show_leaf_name = False
    ts.title.add_face(TextFace(title, fsize=20), column=0)
    t.render("example_motifs_H2A.svg", w=6000, dpi=300, tree_style=ts)

    #10. Conservation############
    #############################
    features = get_hist_ss_in_aln_for_shade(msa_tr, below=True)
    cn = add_consensus(msa_tr, threshold=0.5)[-2:-1]
    # Below are three methods that we find useful.
    # plot_prof4seq('cons_sofp_psic',map(float,cons_prof(msa_tr,f=2,c=2)),cn,features,axis='conservation')
    plot_prof4seq('example_cons_ent_unw',
                  map(lambda x: log(20) + x,
                      map(float, cons_prof(msa_tr, f=0, c=0))),
                  cn,
                  features,
                  axis='conservation')
    plot_prof4seq('example_cons_ent_unw_norm',
                  map(lambda x: log(20) + x,
                      map(float, cons_prof(msa_tr, f=0, c=0, norm="T"))),
                  cn,
                  features,
                  axis='conservation')

    # plot_prof4seq('cons_sofp_unw',map(float,cons_prof(msa_tr,f=0,c=2)),cn,features,axis='conservation')
    plot_prof4seq('example_cons_sofp_unw_renorm1',
                  map(float, cons_prof(msa_tr, f=0, c=2, m=1)),
                  cn,
                  features,
                  axis='conservation')
    plot_prof4seq('example_cons_sofp_unw',
                  map(float, cons_prof(msa_tr, f=0, c=2, m=0)),
                  cn,
                  features,
                  axis='conservation')
    plot_prof4seq('example_cons_sofp_psic_renorm1',
                  map(float, cons_prof(msa_tr, f=2, c=2, m=1)),
                  cn,
                  features,
                  axis='conservation')
    n.set_style(nstyle)

# Create an independent node style for each leaf, which
# specifies the colour given in the locations.csv file
for n in t.get_leaves():
    name = n.get_leaf_names()[0]
    print name
    nstyle = NodeStyle()
    try:
        nstyle["fgcolor"] = colours[samples[name]['Hospitals']]
#Hospitals']]
    except KeyError:
        nstyle["fgcolor"] = "grey"
    nstyle["size"] = 10

    n.set_style(nstyle)

    try:
        n.add_face(TextFace(samples[name]['Locations']),
                   column=0,
                   position="branch-right")
    except:
        pass
    try:
        n.add_face(TextFace(samples[name]['SamplingPeriod']),
                   column=1,
                   position="branch-right")
    except:
        pass
t.render(file_name=sys.argv[3], tree_style=ts)
Beispiel #23
0
def main():
	args = parse_args()
	
	if args.data:
		print "\nReading tree from " + args.tree + " and data matrix from " + args.data
		tree = ClusterTree(args.tree, text_array=args.data)
	else:
		print "\nReading tree from " + args.tree
		tree = Tree(args.tree)	
		
	if args.midpoint:
		R = tree.get_midpoint_outgroup()
		tree.set_outgroup(R)
		print "- Applying midpoint rooting"
	elif args.outgroup:
		tree.set_outgroup( tree&args.outgroup )
		print "- Rooting using outgroup " + args.outgroup
		
	if not args.no_ladderize:
		tree.ladderize()
		print "- Ladderizing tree"

	table, column_list, column_values = readtable(args, tree.get_leaf_names())
	
	labels = []
	if args.labels:
		print "\nThese labels will be printed next to each strain:"
		for label in args.labels:
			if label in column_list:
				labels.append(label)
				print " " + label
			else:
				print "WARNING: specified label " + label + " was not found in the columns of the info file provided, " + args.info

	# set node styles
	# start by setting all to no shapes, black labels
	for n in tree.traverse():
		nstyle = NodeStyle()
		nstyle["fgcolor"] = "black"
		nstyle["size"] = 0
		n.set_style(nstyle)
	
	# add colour tags next to nodes
	if args.colour_tags:
		colour_tags = []
		print "\nThese columns will be used to generate colour tags:"
		for label in args.colour_tags:
			if label in column_list:
				colour_tags.append(label)
				print " " + label
			else:
				print "\tWARNING: specified label for colour tagging, " + label + ", was not found in the columns of the info file provided, " + args.info
				
		for i in range(0,len(colour_tags)):
			label = colour_tags[i]
			colour_dict = getColourPalette(column_values[label],args,label)
			
			print "- Adding colour tag for " + label
			
			for node in tree.get_leaves():
				this_face = Face()
				this_face.margin_left = args.padding
				node.add_face(this_face, column=0, position = "aligned")
				
				if node.name in table:
					this_label = table[node.name][label]
					this_colour = colour_dict[this_label]
				else:
					this_colour = "white"
				this_face = Face()
				this_face.background.color = this_colour
				this_face.margin_right = args.margin_right
				this_face.margin_left = args.margin_left
				this_face.margin_top = args.margin_top
				this_face.margin_bottom = args.margin_bottom
				this_face.border.width = args.border_width
				this_face.border.color="white"
				node.add_face(this_face, column=i+1, position = "aligned")
		print
	else:
		colour_tags = []
	
	# add labels as columns
	for i in range(0,len(labels)):
		
		label = labels[i]
		
		print "- Adding label " + label
		if label == args.colour_nodes_by:
			print "  also colouring nodes by these values"
		
		colour_dict = getColourPalette(column_values[label],args,label)
		
		for node in tree.get_leaves():
			if node.name in table:
				this_label = table[node.name][label]
				this_colour = colour_dict[this_label]
			else:
				this_label = ""
				this_colour = "black"

			this_face = TextFace(text=this_label, fsize = args.font_size)
			if args.tags:
				this_face.background.color = this_colour
			elif label == args.colour_nodes_by:
				this_face.fgcolor = this_colour
			this_face.margin_right = args.padding
			if i == 0:
				this_face.margin_left = args.padding
			node.add_face(this_face, column=i+len(colour_tags)+1, position = "aligned")
			
			# set leaves to coloured circles
			node.img_style["size"] = args.node_size
			if label == args.colour_nodes_by:
				node.img_style["fgcolor"] = this_colour
					
	if args.colour_branches_by or args.colour_backgrounds_by or args.branch_support_colour:
		if args.colour_branches_by:
			print "- Colouring branches by label " + args.colour_branches_by
			colour_dict_br = getColourPalette(column_values[args.colour_branches_by],args,args.colour_branches_by)
		if args.colour_backgrounds_by:
			print "- Colouring node backgrounds by label " + args.colour_backgrounds_by
			colour_dict_bg = getColourPalette(column_values[args.colour_backgrounds_by],args,args.colour_backgrounds_by)
		if args.branch_support_colour:
			print "- Colouring branches by support values"
			# colours extracted from R using rgb( colorRamp(c("white","red", "black"))(seq(0, 1, length = 100)), max = 255)
			# support_colours = {0.0:"#FFFFFF",0.01:"#FFFFFF", 0.02:"#FFF9F9", 0.03:"#FFF4F4", 0.04:"#FFEFEF", 0.05:"#FFEAEA", 0.06:"#FFE5E5", 0.07:"#FFE0E0", 0.08:"#FFDADA", 0.09:"#FFD5D5", 0.1:"#FFD0D0", 0.11:"#FFCBCB", 0.12:"#FFC6C6", 0.13:"#FFC1C1", 0.14:"#FFBCBC", 0.15:"#FFB6B6", 0.16:"#FFB1B1", 0.17:"#FFACAC", 0.18:"#FFA7A7", 0.19:"#FFA2A2", 0.2:"#FF9D9D", 0.21:"#FF9797", 0.22:"#FF9292", 0.23:"#FF8D8D", 0.24:"#FF8888", 0.25:"#FF8383", 0.26:"#FF7E7E", 0.27:"#FF7979", 0.28:"#FF7373", 0.29:"#FF6E6E", 0.3:"#FF6969", 0.31:"#FF6464", 0.32:"#FF5F5F", 0.33:"#FF5A5A", 0.34:"#FF5454", 0.35:"#FF4F4F", 0.36:"#FF4A4A", 0.37:"#FF4545", 0.38:"#FF4040", 0.39:"#FF3B3B", 0.4:"#FF3636", 0.41:"#FF3030", 0.42:"#FF2B2B", 0.43:"#FF2626", 0.44:"#FF2121", 0.45:"#FF1C1C", 0.46:"#FF1717", 0.47:"#FF1212", 0.48:"#FF0C0C", 0.49:"#FF0707", 0.5:"#FF0202", 0.51:"#FC0000", 0.52:"#F70000", 0.53:"#F20000", 0.54:"#EC0000", 0.55:"#E70000", 0.56:"#E20000", 0.57:"#DD0000", 0.58:"#D80000", 0.59:"#D30000", 0.6:"#CE0000", 0.61:"#C80000", 0.62:"#C30000", 0.63:"#BE0000", 0.64:"#B90000", 0.65:"#B40000", 0.66:"#AF0000", 0.67:"#A90000", 0.68:"#A40000", 0.69:"#9F0000", 0.7:"#9A0000", 0.71:"#950000", 0.72:"#900000", 0.73:"#8B0000", 0.74:"#850000", 0.75:"#800000", 0.76:"#7B0000", 0.77:"#760000", 0.78:"#710000", 0.79:"#6C0000", 0.8:"#670000", 0.81:"#610000", 0.82:"#5C0000", 0.83:"#570000", 0.84:"#520000", 0.85:"#4D0000", 0.86:"#480000", 0.87:"#420000", 0.88:"#3D0000", 0.89:"#380000", 0.9:"#330000", 0.91:"#2E0000", 0.92:"#290000", 0.93:"#240000", 0.94:"#1E0000", 0.95:"#190000", 0.96:"#140000", 0.97:"#0F0000", 0.98:"#0A0000", 0.99:"#050000", 1:"#000000"}
			# rgb( colorRamp(c("red", "black"))(seq(0, 1, length = 100)), max = 255))
			support_colours = {}
			
			if args.branch_support_cutoff:
				for i in range(0,args.branch_support_cutoff):
					support_colours[i] = "#FF0000"
				for i in range(args.branch_support_cutoff,101):
					support_colours[i] = "#000000"
			else:
				if args.branch_support_percent:
					support_colours = {0:"#FF0000",1:"#FF0000",2:"#FC0000",3:"#F90000",4:"#F70000",5:"#F40000",6:"#F20000",7:"#EF0000",8:"#EC0000",9:"#EA0000",10:"#E70000",11:"#E50000",12:"#E20000",13:"#E00000",14:"#DD0000",15:"#DA0000",16:"#D80000",17:"#D50000",18:"#D30000",19:"#D00000",20:"#CE0000",21:"#CB0000",22:"#C80000",23:"#C60000",24:"#C30000",25:"#C10000",26:"#BE0000",27:"#BC0000",28:"#B90000",29:"#B60000",30:"#B40000",31:"#B10000",32:"#AF0000",33:"#AC0000",34:"#AA0000",35:"#A70000",36:"#A40000",37:"#A20000",38:"#9F0000",39:"#9D0000",40:"#9A0000",41:"#970000",42:"#950000",43:"#920000",44:"#900000",45:"#8D0000",46:"#8B0000",47:"#880000",48:"#850000",49:"#830000",50:"#800000",51:"#7E0000",52:"#7B0000",53:"#790000",54:"#760000",55:"#730000",56:"#710000",57:"#6E0000",58:"#6C0000",59:"#690000",60:"#670000",61:"#640000",62:"#610000",63:"#5F0000",64:"#5C0000",65:"#5A0000",66:"#570000",67:"#540000",68:"#520000",69:"#4F0000",70:"#4D0000",71:"#4A0000",72:"#480000",73:"#450000",74:"#420000",75:"#400000",76:"#3D0000",77:"#3B0000",78:"#380000",79:"#360000",80:"#330000",81:"#300000",82:"#2E0000",83:"#2B0000",84:"#290000",85:"#260000",86:"#240000",87:"#210000",88:"#1E0000",89:"#1C0000",90:"#190000",91:"#170000",92:"#140000",93:"#120000",94:"#0F0000",95:"#0C0000",96:"#0A0000",97:"#070000",98:"#050000",99:"#020000",100:"#000000"}
				else:
					support_colours = {0.0:"#FF0000", 0.01:"#FF0000", 0.02:"#FC0000", 0.03:"#F90000", 0.04:"#F70000", 0.05:"#F40000", 0.06:"#F20000", 0.07:"#EF0000", 0.08:"#EC0000", 0.09:"#EA0000", 0.1:"#E70000", 0.11:"#E50000", 0.12:"#E20000", 0.13:"#E00000", 0.14:"#DD0000", 0.15:"#DA0000", 0.16:"#D80000", 0.17:"#D50000", 0.18:"#D30000", 0.19:"#D00000", 0.2:"#CE0000", 0.21:"#CB0000", 0.22:"#C80000", 0.23:"#C60000", 0.24:"#C30000", 0.25:"#C10000", 0.26:"#BE0000", 0.27:"#BC0000", 0.28:"#B90000", 0.29:"#B60000", 0.3:"#B40000", 0.31:"#B10000", 0.32:"#AF0000", 0.33:"#AC0000", 0.34:"#AA0000", 0.35:"#A70000", 0.36:"#A40000", 0.37:"#A20000", 0.38:"#9F0000", 0.39:"#9D0000", 0.4:"#9A0000", 0.41:"#970000", 0.42:"#950000", 0.43:"#920000", 0.44:"#900000", 0.45:"#8D0000", 0.46:"#8B0000", 0.47:"#880000", 0.48:"#850000", 0.49:"#830000", 0.5:"#800000", 0.51:"#7E0000", 0.52:"#7B0000", 0.53:"#790000", 0.54:"#760000", 0.55:"#730000", 0.56:"#710000", 0.57:"#6E0000", 0.58:"#6C0000", 0.59:"#690000", 0.6:"#670000", 0.61:"#640000", 0.62:"#610000", 0.63:"#5F0000", 0.64:"#5C0000", 0.65:"#5A0000", 0.66:"#570000", 0.67:"#540000", 0.68:"#520000", 0.69:"#4F0000", 0.7:"#4D0000", 0.71:"#4A0000", 0.72:"#480000", 0.73:"#450000", 0.74:"#420000", 0.75:"#400000", 0.76:"#3D0000", 0.77:"#3B0000", 0.78:"#380000", 0.79:"#360000", 0.8:"#330000", 0.81:"#300000", 0.82:"#2E0000", 0.83:"#2B0000", 0.84:"#290000", 0.85:"#260000", 0.86:"#240000", 0.87:"#210000", 0.88:"#1E0000", 0.89:"#1C0000", 0.9:"#190000", 0.91:"#170000", 0.92:"#140000", 0.93:"#120000", 0.94:"#0F0000", 0.95:"#0C0000", 0.96:"#0A0000", 0.97:"#070000", 0.98:"#050000", 0.99:"#020000", 1.0:"#000000"}
		for node in tree.traverse():			
			nstyle = NodeStyle()
			nstyle["size"] = 0
			if node.name in table:
				#print "Colouring individual " + node.name
				if args.colour_branches_by:
					nstyle["vt_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]] # set branch colour
					nstyle["hz_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]]
				if args.colour_backgrounds_by:
					if args.colour_branches_by in table[node.name]:
						if table[node.name][args.colour_branches_by] != "none":
							nstyle["bgcolor"] = colour_dict_bg[table[node.name][args.colour_backgrounds_by]] # set background colour
				node.set_style(nstyle)
			else:
				# internal node
				descendants = node.get_leaves()
				descendant_labels_br = []
				descendant_labels_bg = []
				for d in descendants:
					if args.colour_branches_by:
						if d.name in table:
							this_label_br = table[d.name][args.colour_branches_by]
							if this_label_br not in descendant_labels_br:
								descendant_labels_br.append(this_label_br)
						elif "none" not in descendant_labels_br:
							descendant_labels_br.append("none")
					if args.colour_backgrounds_by:
						if d.name in table:
							this_label_bg = table[d.name][args.colour_backgrounds_by]
							if this_label_bg not in descendant_labels_bg:
								descendant_labels_bg.append(this_label_bg)
						elif "none" not in descendant_labels_bg:
							descendant_labels_bg.append("none")
#				nstyle = NodeStyle()
#				nstyle["size"] = 0
				if len(descendant_labels_br) == 1 and descendant_labels_br[0] != "none":
					this_colour = colour_dict_br[descendant_labels_br[0]]
					nstyle["vt_line_color"] = this_colour # set branch colour
					nstyle["hz_line_color"] = this_colour
				elif args.branch_support_colour and not node.is_leaf():
					if int(node.support) in support_colours:
						nstyle["vt_line_color"] = support_colours[int(node.support)] # take colour from support value
						nstyle["hz_line_color"] = support_colours[int(node.support)]
					else:
						print "  WARNING support values don't make sense. Note scale is assumed to be 0-1 unless using the --branch_support_percent flag."
				if len(descendant_labels_bg) == 1 and descendant_labels_bg[0] != "none":
					this_colour = colour_dict_bg[descendant_labels_bg[0]]
					nstyle["bgcolor"] = this_colour # set background colour
				node.set_style(nstyle)
					
	if args.colour_nodes_by:
		if args.colour_nodes_by not in labels:
			print "- Colouring nodes by label " + args.colour_nodes_by
			colour_dict = getColourPalette(column_values[args.colour_nodes_by],args,args.colour_nodes_by)
			for node in tree.get_leaves():
				if node.name in table:
					this_label = table[node.name][args.colour_nodes_by]
					this_colour = colour_dict[this_label]
					if this_colour != "None":
						node.img_style["fgcolor"] = this_colour
						node.img_style["size"] = args.node_size
				
			
			
	# set tree style
	ts = TreeStyle()
	
	if args.show_leaf_names:
		ts.show_leaf_name = True
	else:
		ts.show_leaf_name = False

	if args.length_scale:
		ts.scale = args.length_scale
		
	if args.branch_padding:
		ts.branch_vertical_margin = args.branch_padding
		
	if args.branch_support_print:
		ts.show_branch_support = True
		
	if args.fan:
		ts.mode = "c"
		print "\nPrinting circular tree (--fan)"
	else:
		print "\nPrinting rectangular tree, to switch to circular use --fan"
		
	if args.title:
		title = TextFace(args.title, fsize=20)
		title.margin_left = 20
		title.margin_top = 20
		ts.title.add_face(title, column=1)
		
	if args.no_guiding_lines:
		ts.draw_guiding_lines = False
		
	if args.data:
		print "\nPrinting data matrix as " + args.data_type + " with range (" +  str(args.mindata) + "->" +  str(args.maxdata) + ";" +  str(args.centervalue) + "), height " + str(args.data_height) + ", width " + str(args.data_width)
		profileFace  = ProfileFace(min_v=args.mindata, max_v=args.maxdata, center_v=args.centervalue, width=args.data_width, height=args.data_height, style=args.data_type)
		def mylayout(node):
			if node.is_leaf():
				add_face_to_node(profileFace, node, 0, aligned=True)
		ts.layout_fn = mylayout
		
	# set root branch length to zero
	tree.dist=0
	
	# render tree
	tree.render(args.output, w=args.width, dpi=300, units="mm", tree_style=ts)
	
	print "\n FINISHED! Tree plot printed to file " + args.output
	print
	
	if args.print_colour_dict:
		print colour_dict
		if args.colour_branches_by:
			print colour_dict_br
		if args.colour_backgrounds_by:
			print colour_dict_bg
	
	if args.interactive:
		print "\nEntering interactive mode..."
		tree.show(tree_style=ts)
Beispiel #24
0
def draw_tree(tree, conf, outfile):
    try:
        from ete2 import (add_face_to_node, AttrFace, TextFace, TreeStyle, RectFace, CircleFace,
                             SequenceFace, random_color, SeqMotifFace)
    except ImportError as e:
        print e
        return
  
    def ly_basic(node):
        if node.is_leaf():
            node.img_style['size'] = 0
        else:
            node.img_style['size'] = 0
            node.img_style['shape'] = 'square'
            if len(MIXED_RES) > 1 and hasattr(node, "tree_seqtype"):
                if node.tree_seqtype == "nt":
                    node.img_style["bgcolor"] = "#CFE6CA"
                    ntF = TextFace("nt", fsize=6, fgcolor='#444', ftype='Helvetica')
                    add_face_to_node(ntF, node, 10, position="branch-bottom")
            if len(NPR_TREES) > 1 and hasattr(node, "tree_type"):
                node.img_style['size'] = 4
                node.img_style['fgcolor'] = "steelblue"

        node.img_style['hz_line_width'] = 1
        node.img_style['vt_line_width'] = 1
                    
    def ly_leaf_names(node):
        if node.is_leaf():
            spF = TextFace(node.species, fsize=10, fgcolor='#444444', fstyle='italic', ftype='Helvetica')
            add_face_to_node(spF, node, column=0, position='branch-right')
            if hasattr(node, 'genename'):
                geneF = TextFace(" (%s)" %node.genename, fsize=8, fgcolor='#777777', ftype='Helvetica')
                add_face_to_node(geneF, node, column=1, position='branch-right')

    def ly_supports(node):
        if not node.is_leaf() and node.up:
            supFace = TextFace("%0.2g" %(node.support), fsize=7, fgcolor='indianred')
            add_face_to_node(supFace, node, column=0, position='branch-top')
                
    def ly_tax_labels(node):
        if node.is_leaf():
            c = LABEL_START_COL
            largest = 0
            for tname in TRACKED_CLADES:
                if hasattr(node, "named_lineage") and tname in node.named_lineage:
                    linF = TextFace(tname, fsize=10, fgcolor='white')
                    linF.margin_left = 3
                    linF.margin_right = 2
                    linF.background.color = lin2color[tname]
                    add_face_to_node(linF, node, c, position='aligned')
                    c += 1
            
            for n in xrange(c, len(TRACKED_CLADES)):
                add_face_to_node(TextFace('', fsize=10, fgcolor='slategrey'), node, c, position='aligned')
                c+=1

    def ly_full_alg(node):
        pass

    def ly_block_alg(node):
        if node.is_leaf():
            if 'sequence' in node.features:
                seqFace = SeqMotifFace(node.sequence, [])
                # [10, 100, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|domain Name"],
                motifs = []
                last_lt = None
                for c, lt in enumerate(node.sequence):
                    if lt != '-':
                        if last_lt is None:
                            last_lt = c
                        if c+1 == len(node.sequence):
                            start, end = last_lt, c
                            motifs.append([start, end, "()", 0, 12, "slategrey", "slategrey", None])
                            last_lt = None
                    elif lt == '-':
                        if last_lt is not None:
                            start, end = last_lt, c-1
                            motifs.append([start, end, "()", 0, 12, "grey", "slategrey", None])
                            last_lt = None

                seqFace = SeqMotifFace(node.sequence, motifs,
                                       intermotif_format="line",
                                       seqtail_format="line", scale_factor=ALG_SCALE)
                add_face_to_node(seqFace, node, ALG_START_COL, aligned=True)

                
    TRACKED_CLADES = ["Eukaryota", "Viridiplantae",  "Fungi",
                      "Alveolata", "Metazoa", "Stramenopiles", "Rhodophyta",
                      "Amoebozoa", "Crypthophyta", "Bacteria",
                      "Alphaproteobacteria", "Betaproteobacteria", "Cyanobacteria",
                      "Gammaproteobacteria",]
    
    # ["Opisthokonta",  "Apicomplexa"]
    
    colors = random_color(num=len(TRACKED_CLADES), s=0.45)
    lin2color = dict([(ln, colors[i]) for i, ln in enumerate(TRACKED_CLADES)])

    NAME_FACE = AttrFace('name', fsize=10, fgcolor='#444444')
        
    LABEL_START_COL = 10
    ALG_START_COL = 40
    ts = TreeStyle()
    ts.draw_aligned_faces_as_table = False
    ts.draw_guiding_lines = False
    ts.show_leaf_name = False
    ts.show_branch_support = False
    ts.scale = 160

    ts.layout_fn = [ly_basic, ly_leaf_names, ly_supports, ly_tax_labels]

    MIXED_RES = set()
    MAX_SEQ_LEN = 0
    NPR_TREES = []
    for n in tree.traverse():
        if hasattr(n, "tree_seqtype"):
            MIXED_RES.add(n.tree_seqtype)
        if hasattr(n, "tree_type"):
            NPR_TREES.append(n.tree_type)
        seq = getattr(n, "sequence", "")
        MAX_SEQ_LEN = max(len(seq), MAX_SEQ_LEN) 

    if MAX_SEQ_LEN:
        ALG_SCALE = min(1, 1000./MAX_SEQ_LEN)
        ts.layout_fn.append(ly_block_alg)
        
    if len(NPR_TREES) > 1:
        rF = RectFace(4, 4, "steelblue", "steelblue")
        rF.margin_right = 10
        rF.margin_left = 10
        ts.legend.add_face(rF, 0)
        ts.legend.add_face(TextFace(" NPR node"), 1)
        ts.legend_position = 3

    if len(MIXED_RES) > 1:
        rF = RectFace(20, 20, "#CFE6CA", "#CFE6CA")
        rF.margin_right = 10
        rF.margin_left = 10
        ts.legend.add_face(rF, 0)
        ts.legend.add_face(TextFace(" Nucleotide based alignment"), 1)
        ts.legend_position = 3
 

    try:
        tree.set_species_naming_function(spname)
        annotate_tree_with_ncbi(tree)
        a = tree.search_nodes(species='Dictyostelium discoideum')[0]
        b = tree.search_nodes(species='Chondrus crispus')[0]
        #out = tree.get_common_ancestor([a, b])
        #out = tree.search_nodes(species='Haemophilus parahaemolyticus')[0].up
        tree.set_outgroup(out)    
        tree.swap_children()
    except Exception:
        pass
    
    tree.render(outfile, tree_style=ts, w=170, units='mm', dpi=150)
    tree.render(outfile+'.svg', tree_style=ts, w=170, units='mm', dpi=150)
    tree.render(outfile+'.pdf', tree_style=ts, w=170, units='mm', dpi=150)
Beispiel #25
0
def main(argv):
	input_file=''
	title='Title'
	label_internal_nodes = False
	label_leaves = False
	out_file=''
	width=750
	out_file_xml=''
	try:
		opts, args = getopt.getopt(argv,"h:i:t:lno:w:x:",["Help=","InputFile=","Title=","LabelLeaves=", "LabelInternalNodes=","OutFile=","Width=","OutFileXML="])
	except getopt.GetoptError:
		print 'Unknown option, call using: ./PlotTree.py -i <InputCAMIFile> -t <Title> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>'
		sys.exit(2)
	for opt, arg in opts:
		if opt == '-h':
			print './PlotTree.py -i <InputCAMIFile> -t <Title> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -o <OutFile> -x <OutFile.xml> -w <Width>'
			sys.exit(2)
		elif opt in ("-i", "--InputFile"):
			input_file = arg
		elif opt in ("-t", "--Title"):
			title = arg
		elif opt in ("-l", "--LabelLeaves"):
			label_leaves = True
		elif opt in ("-n","--LabelInternalNodes"):
			label_internal_nodes = True
		elif opt in ("-o", "--OutFile"):
			out_file = arg
		elif opt in ("-w", "--Width"):
			width = int(arg)
		elif opt in ("-x", "--OutFileXML"):
			out_file_xml = arg
			
	schema_names = COLOR_SCHEMES.keys()
	
	#Read the common kmer profile
	ckm_tax_paths = []
	ckm_name_to_perc = dict()
	fid = open(input_file,'r')
	file = fid.readlines()
	fid.close()
	
	#Put placeholders in for missing names like: "||" -> "|NA1|"
	file_noblank = list()
	i=0
	for line in file:
		while "||" in line:
			line = line.replace("||","|NONAME|",1)
			i = i+1
		file_noblank.append(line)
	
	#Get the names and weights
	for line in file_noblank:
		if line[0]!='#' and line[0]!='@' and line[0]!='\n': #Don't parse comments or blank lines
			temp = line.split()[3] #Get the names
			ckm_tax_paths.append(temp)
			ckm_name_to_perc[temp.split("|")[-1]] = line.split()[-1] #Get the weights
	
	#Create the tree
	t=Tree()
	names_to_nodes = dict()
	for i in range(0,len(ckm_tax_paths)):
		split_tax_path = ckm_tax_paths[i].split("|")
		if len(split_tax_path)==1: #If len==1, then it's a superkingdom
			names_to_nodes[split_tax_path[0]] = t.add_child(name=split_tax_path[0]) #connect directly to tree
		else:
			if split_tax_path[-2] in names_to_nodes: #If the parent is already in the tree, add to tree
				names_to_nodes[split_tax_path[-1]] = names_to_nodes[split_tax_path[-2]].add_child(name=split_tax_path[-1])
			else: #Otherwise iterate up until we have something that is in the tree
				j=2
				while split_tax_path[-j]=="NONAME":
					j = j + 1
				#This skips over the NONAMES
				names_to_nodes[split_tax_path[-1]] = names_to_nodes[split_tax_path[-j]].add_child(name=split_tax_path[-1])
	
	#Show the tree
	#print t.get_ascii(show_internal=True)
	
	#scheme = random.sample(schema_names, 1)[0] #'set2' is nice, 
	scheme = 'set2'

	def layout(node):
		if node.name in ckm_name_to_perc:
			ckm_perc = float(ckm_name_to_perc[node.name])
		else:
			ckm_perc = 0
		F = CircleFace(radius=3.14*math.sqrt(ckm_perc), color="RoyalBlue", style="sphere")
		F.border.width = None
		F.opacity = 0.6
		faces.add_face_to_node(F,node, 0, position="branch-right")
		if label_internal_nodes:
			faces.add_face_to_node(TextFace(node.name, fsize=7),node, 0, position="branch-top")
	
	ts = TreeStyle()
	ts.layout_fn = layout
	ts.mode = "r"
	ts.show_leaf_name = label_leaves
	ts.min_leaf_separation = 50
	ts.title.add_face(TextFace(title, fsize=20), column=0)
	
	#Export the tree to a png image
	t.render(out_file, w=width, units="mm", tree_style=ts)

    #Export the xml file
	project = Phyloxml()
	phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[]))
	phylo.phyloxml_phylogeny.set_name(title)
	project.add_phylogeny(phylo)
	project.export(open(out_file_xml,'w'))
Beispiel #26
0
    def layout(node):
        # print node.rank
        # print node.sci_name
        if getattr(node, "rank", None):
            if (node.rank in ['order', 'class', 'phylum', 'kingdom']):
                rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred")
                node.add_face(rank_face, column=0, position="branch-top")
        if node.is_leaf():
            sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue")
            node.add_face(sciname_face, column=0, position="branch-right")
        if node.is_leaf() and not node.name == 'annotation':
            #here we are adding faces and we need to play with seqmotif face
            seq = str(seqreclist[taxids.index(int(node.name))].seq)
            motifs = []  #[[0,len(seq), "seq", 10, 10, None, None, None]]
            for f in seqreclist[taxids.index(int(node.name))].features:
                if f.type == 'domain':
                    motifs.append([
                        f.location.start, f.location.end, "[]", None, 10,
                        "blue",
                        f.qualifiers.get('color',
                                         get_color(
                                             f.qualifiers['name'])).lower(),
                        "arial|8|black|%s" % f.qualifiers['name']
                    ])
                if f.type == 'motif':
                    #It turns out that we need to solve overlap problem here, here it is solved only in case of one overlap
                    s = f.location.start
                    e = f.location.end
                    flag = True
                    overlappedm = []
                    for m in motifs:
                        if m[2] == 'seq' and m[0] < e and m[
                                1] > s:  #we have an overlap, four cases, preceding motife always is on top
                            flag = False
                            overlappedm.append(m)
                    if not flag:  #we have to solve multiple overlap problem
                        #let's do it by scanning
                        sflag = False
                        eflag = False
                        for x in range(s, e + 1):
                            if not sflag:  #check if we can start
                                overlap = False
                                for m in overlappedm:
                                    if x >= m[0] and x < m[1]:
                                        overlap = True
                                if not overlap:
                                    ts = x
                                    sflag = True

                            #check if is time to end
                            if sflag and not eflag:
                                overlap = False
                                for m in overlappedm:
                                    if x == m[0]:
                                        overlap = True
                                if overlap or x == e:
                                    te = x
                                    eflag = True

                            if sflag and eflag:
                                motifs.append([
                                    ts, te, "seq", 10, 10, "black",
                                    f.qualifiers.get(
                                        'color', get_color(
                                            f.qualifiers['name'])).lower(),
                                    None
                                ])
                                sflag = False
                                eflag = False
                    if flag:
                        motifs.append([
                            f.location.start, f.location.end, "seq", 10, 10,
                            "black",
                            f.qualifiers.get(
                                'color',
                                get_color(f.qualifiers['name'])).lower(), None
                        ])
            seqFace = SeqMotifFace(seq,
                                   motifs,
                                   scale_factor=1,
                                   seq_format="[]")
            seqFace.overlaping_motif_opacity = 1.0
            # seqFace.fg=aafgcolors
            # seqFace.bg=aabgcolors_gray

            add_face_to_node(seqFace, node, 0, position="aligned")
            # gi=taxid2gi[int(node.name)]
            add_face_to_node(
                TextFace(' ' + seqreclist[taxids.index(int(node.name))].id +
                         '         '),
                node,
                column=1,
                position="aligned")
            # add_face_to_node(TextFace('      '+str(int(node.name))+' '),node,column=2, position = "aligned")
            # add_face_to_node(TextFace('      '+str(gi2variant[gi])+' '),node,column=3, position = "aligned")

        #We currently disable annotation
        if node.is_leaf() and node.name == 'annotation':
            if (annotation):
                s = annotation
                # get_hist_ss_in_aln_as_string(msa_tr)
            else:
                s = ' ' * max(map(lambda x: len(x.seq), seqreclist))
Beispiel #27
0
def run(args):
    if args.text_mode:
        from ete2 import Tree
        for tindex, tfile in enumerate(args.src_tree_iterator):
            #print tfile
            if args.raxml:
                nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
                t = Tree(nw)
            else:
                t = Tree(tfile)
            
            print t.get_ascii(show_internal=args.show_internal_names,
                              attributes=args.show_attributes)
        return
        
    import random
    import re
    import colorsys
    from collections import defaultdict
    from ete2 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle,
                         add_face_to_node, random_color)
    
    global FACES
    
    if args.face:
        FACES = parse_faces(args.face)
    else:
        FACES = []

    # VISUALIZATION
    ts = TreeStyle()
    ts.mode = args.mode
    ts.show_leaf_name = True
    ts.tree_width = args.tree_width

    
    for f in FACES:
        if f["value"] == "@name":
            ts.show_leaf_name = False
            break

    if args.as_ncbi:
        ts.show_leaf_name = False
        FACES.extend(parse_faces(
            ['value:@sci_name, size:10, fstyle:italic',
             'value:@taxid, color:grey, size:6, format:" - %s"',
             'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal',
             'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal',
         ]))


    if args.alg:
        FACES.extend(parse_faces(
            ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type]
         ))

    if args.heatmap:
        FACES.extend(parse_faces(
            ['value:@name, size:10, pos:aligned, ftype:heatmap']
         ))
        
    if args.bubbles:
        for bubble in args.bubbles:
            FACES.extend(parse_faces(
                ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble,
             ]))
                        
    ts.branch_vertical_margin = args.branch_separation
    if args.show_support:
        ts.show_branch_support = True
    if args.show_branch_length:
        ts.show_branch_length = True
    if args.force_topology:
        ts.force_topology = True
    ts.layout_fn = lambda x: None
        
    for tindex, tfile in enumerate(args.src_tree_iterator):
        #print tfile
        if args.raxml:
            nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
            t = PhyloTree(nw)
        else:
            t = PhyloTree(tfile)

            
        if args.alg:
            t.link_to_alignment(args.alg, alg_format=args.alg_format)

        if args.heatmap:
            DEFAULT_COLOR_SATURATION = 0.3
            BASE_LIGHTNESS = 0.7
            def gradient_color(value, max_value, saturation=0.5, hue=0.1):    
                def rgb2hex(rgb):
                    return '#%02x%02x%02x' % rgb
                def hls2hex(h, l, s):
                    return rgb2hex( tuple(map(lambda x: int(x*255), colorsys.hls_to_rgb(h, l, s))))

                lightness = 1 - (value * BASE_LIGHTNESS) / max_value
                return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION)

            
            heatmap_data = {}
            max_value, min_value = None, None
            for line in open(args.heatmap):
                if line.startswith('#COLNAMES'):
                    pass
                elif line.startswith('#') or not line.strip():
                    pass
                else:
                    fields = line.split('\t')
                    name = fields[0].strip()

                    values = map(lambda x: float(x) if x else None, fields[1:])

                    maxv = max(values)
                    minv = min(values)
                    if max_value is None or maxv > max_value:
                        max_value = maxv
                    if min_value is None or minv < min_value:
                        min_value = minv                        
                    heatmap_data[name] = values

            heatmap_center_value = 0
            heatmap_color_center = "white"
            heatmap_color_up = 0.3
            heatmap_color_down = 0.7
            heatmap_color_missing = "black"
            
            heatmap_max_value = abs(heatmap_center_value - max_value)
            heatmap_min_value = abs(heatmap_center_value - min_value)
                    
            if heatmap_center_value <= min_value:
                heatmap_max_value = heatmap_min_value + heatmap_max_value
            else:
                heatmap_max_value = max(heatmap_min_value, heatmap_max_value)

                    
                    
        # scale the tree
        if not args.height: 
            args.height = None
        if not args.width: 
            args.width = None

        f2color = {}
        f2last_seed = {}
        for node in t.traverse():
            node.img_style['size'] = 0                
            if len(node.children) == 1:
                node.img_style['size'] = 2                
                node.img_style['shape'] = "square"
                node.img_style['fgcolor'] = "steelblue"

            ftype_pos = defaultdict(int)

            for findex, f in enumerate(FACES):
                if (f['nodetype'] == 'any' or 
                    (f['nodetype'] == 'leaf' and node.is_leaf()) or
                    (f['nodetype'] == 'internal' and not node.is_leaf())):


                    # if node passes face filters
                    if node_matcher(node, f["filters"]):
                        if f["value"].startswith("@"):
                            fvalue = getattr(node, f["value"][1:], None)
                        else:
                            fvalue = f["value"]

                        # if node's attribute has content, generate face
                        if fvalue is not None:
                            fsize = f["size"]
                            fbgcolor = f["bgcolor"]
                            fcolor = f['color']

                            if fcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if fbgcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fbgcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fbgcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if f["ftype"] == "text":
                                if f.get("format", None):
                                    fvalue = f["format"] % fvalue

                                F = TextFace(fvalue,
                                             fsize = fsize,
                                             fgcolor = fcolor or "black",
                                             fstyle = f.get('fstyle', None))

                            elif f["ftype"] == "fullseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="seq",
                                                       seqtail_format="seq",
                                                       height=fsize)
                            elif f["ftype"] == "compactseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq",
                                                       seqtail_format="compactseq",
                                                       height=fsize)
                            elif f["ftype"] == "blockseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq",
                                                   seqtail_format="blockseq",
                                                       height=fsize,
                                                       fgcolor=fcolor or "slategrey",
                                                       bgcolor=fbgcolor or "slategrey",
                                                       scale_factor = 1.0)
                                fbgcolor = None
                            elif f["ftype"] == "bubble":
                                try:
                                    v = float(fvalue)
                                except ValueError:
                                    rad = fsize
                                else:
                                    rad = fsize * v
                                F = faces.CircleFace(radius=rad, style="sphere",
                                                     color=fcolor or "steelblue")

                            elif f["ftype"] == "heatmap":
                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                else:
                                    col = f["column"]

                                for i, value in enumerate(heatmap_data.get(node.name, [])):
                                    ftype_pos[f["pos"]] += 1

                                    if value is None:
                                        color = heatmap_color_missing
                                    elif value > heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up)
                                    elif value < heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down)
                                    else:
                                        color = heatmap_color_center
                                    node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i)
                                    # Add header 
                                    # for i, name in enumerate(header):
                                    #    nameF = TextFace(name, fsize=7)
                                    #    nameF.rotation = -90
                                    #    tree_style.aligned_header.add_face(nameF, column=i)
                                F = None

                            elif f["ftype"] == "profile":
                                # internal profiles?
                                F = None
                            elif f["ftype"] == "barchart":
                                F = None
                            elif f["ftype"] == "piechart":
                                F = None



                            # Add the Face
                            if F:
                                F.opacity = f['opacity'] or 1.0

                                # Set face general attributes
                                if fbgcolor:
                                    F.background.color = fbgcolor

                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                    ftype_pos[f["pos"]] += 1    
                                else:
                                    col = f["column"]
                                node.add_face(F, column=col, position=f["pos"])

        if args.image:
            t.render("t%d.%s" %(tindex, args.image),
                     tree_style=ts, w=args.width, h=args.height, units=args.size_units)
        else:
            t.show(None, tree_style=ts)
Beispiel #28
0
def plot_blast_result(tree_file, blast_result_file_list, id2description,
                      id2mlst):
    '''
    Projet Staph aureus PVL avec Laure Jaton
    Script pour afficher une phylog�nie et la conservation de facteurs de firulence c�te � c�te
    N�cessite r�sultats MLST, ensemble des r�sultats tblastn (facteurs de virulence vs chromosomes),
    ainsi qu'une correspondance entre les accession des g�nomes et les noms qu'on veut afficher dans la phylog�nie. Icemn
    pour les identifiants molis des patients, on les remplace par CHUV n.
    :param tree_file: phylog�nie au format newick avec identifiants correspondants � tous les dico utilis�s
    :param blast_result_file_list: r�sultats tblastn virulence factors vs chromosome (seulement best blast)
    :param id2description: identifiants g�nome utiis� dans l'arbre et description correspondante (i.e S aureus Newman)
    :param id2mlst: identitifiants arbre 2 S. aureus ST type
    :return:
    '''

    blast2data = {}
    queries = []
    for one_blast_file in blast_result_file_list:
        with open(one_blast_file, 'r') as f:
            for line in f:
                line = line.split('\t')
                if line[1] not in blast2data:
                    blast2data[line[1]] = {}
                    blast2data[line[1]][line[0]] = [
                        float(line[2]),
                        int(line[8]),
                        int(line[9])
                    ]
                else:
                    blast2data[line[1]][line[0]] = [
                        float(line[2]),
                        int(line[8]),
                        int(line[9])
                    ]
                if line[0] not in queries:
                    queries.append(line[0])
    print blast2data
    print queries

    for one_blast in blast2data.keys():
        for ref_gene in blast2data[one_blast].keys():

            for query_gene in blast2data[one_blast].keys():
                overlap = False
                if ref_gene == query_gene:
                    continue
                if one_blast == 'NC_002745' and ref_gene == 'selm':
                    print 'target:', ref_gene, blast2data[one_blast][ref_gene]
                    print query_gene, blast2data[one_blast][query_gene]
                # check if position is overlapping
                try:
                    sorted_coordinates = sorted(
                        blast2data[one_blast][ref_gene][1:3])
                    if blast2data[one_blast][query_gene][
                            1] <= sorted_coordinates[1] and blast2data[
                                one_blast][query_gene][
                                    1] >= sorted_coordinates[0]:
                        print 'Overlaping locations!'
                        print one_blast, ref_gene, blast2data[one_blast][
                            ref_gene]
                        print one_blast, query_gene, blast2data[one_blast][
                            query_gene]
                        overlap = True
                    sorted_coordinates = sorted(
                        blast2data[one_blast][query_gene][1:3])
                    if blast2data[one_blast][ref_gene][1] <= sorted_coordinates[
                            1] and blast2data[one_blast][ref_gene][
                                1] >= sorted_coordinates[0]:
                        print 'Overlapping locations!'
                        print one_blast, ref_gene, blast2data[one_blast][
                            ref_gene]
                        print one_blast, query_gene, blast2data[one_blast][
                            query_gene]
                        overlap = True
                    if overlap:
                        if blast2data[one_blast][ref_gene][0] > blast2data[
                                one_blast][query_gene][0]:
                            del blast2data[one_blast][query_gene]
                            print 'removing', query_gene
                        else:
                            del blast2data[one_blast][ref_gene]
                            print 'removing', ref_gene
                            break
                except KeyError:
                    print 'colocation already resolved:', query_gene, ref_gene
    '''
    rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld'
        , 'hlgA', 'hlgB', 'hlgC', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE', 'eta', 'etb', 'sec', 'tst']
    #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH']

    for gene in rm_genes:
        queries.pop(queries.index(gene))
    '''
    #queries = ['selv']
    t1 = Tree(tree_file)
    #t.populate(8)
    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    t1.set_outgroup(R)
    t1.ladderize()
    #t2=t1

    #for lf in t2.iter_leaves():
    #    try:
    #        lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name])
    #    except:
    #        #lf.name = ' %s (%s)' % (lf.name, lf.name)
    #
    #        a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name]))
    #        a.fgcolor = "red"

    #        lf.name = a
    #t2.render("test.svg", dpi=800, h=400)
    #import sys
    #sys.exit()

    # and set it as tree outgroup
    for lf in t1.iter_leaves():
        #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right")
        lf.branch_vertical_margin = 0
        #data = [random.randint(0,2) for x in xrange(3)]

        for col, value in enumerate(queries):

            if lf.name == "1505183575":
                'first row, print gene names'
                #print 'ok!'
                n = TextFace(' %s ' % str(value))
                n.margin_top = 4
                n.margin_right = 4
                n.margin_left = 4
                n.margin_bottom = 4
                n.inner_background.color = "white"
                n.opacity = 1.
                lf.add_face(n, col, position="aligned")

            try:

                identity_value = blast2data[lf.name][value][0]

                if 'CHUV' in id2description[lf.name]:

                    if float(identity_value) > 70:
                        if str(identity_value) == '100.00' or str(
                                identity_value) == '100.0':
                            identity_value = '100'
                        else:
                            identity_value = str(
                                round(float(identity_value), 1))
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 4
                        n.margin_right = 4
                        n.margin_left = 4
                        n.margin_bottom = 4
                        n.inner_background.color = rgb2hex(
                            m.to_rgba(float(identity_value)))
                        if float(identity_value) > 92:
                            n.fgcolor = "white"
                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
                    else:
                        identity_value = '-'
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 2
                        n.margin_right = 2
                        n.margin_left = 2
                        n.margin_bottom = 2
                        n.inner_background.color = "white"
                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
                else:
                    if float(identity_value) > 70:
                        if str(identity_value) == '100.00' or str(
                                identity_value) == '100.0':
                            identity_value = '100'
                        else:
                            identity_value = str(
                                round(float(identity_value), 1))
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 2
                        n.margin_right = 2
                        n.margin_left = 2
                        n.margin_bottom = 2
                        n.inner_background.color = rgb2hex(
                            m2.to_rgba(float(identity_value)))

                        if float(identity_value) > 92:
                            n.fgcolor = "white"

                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
                    else:
                        identity_value = '-'
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 2
                        n.margin_right = 2
                        n.margin_left = 2
                        n.margin_bottom = 2
                        n.inner_background.color = "white"
                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
            except KeyError:

                identity_value = '-'
                n = TextFace(' %s ' % str(identity_value))
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = "white"
                n.opacity = 1.
                lf.add_face(n, col, position="aligned")

        try:
            lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name])
        except:
            #lf.name = ' %s (%s)' % (lf.name, lf.name)

            a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name]))
            a.fgcolor = "red"

            lf.name = a
            #.add_face(a, 0, position="aligned")
    # add boostrap suppot
    #for n in t1.traverse():
    #    if n.is_leaf():
    #        continue
    #    n.add_face(TextFace(str(t1.support)), column=0, position = "branch-bottom")
    #ts = TreeStyle()
    #ts.show_branch_support = True

    # , tree_style=ts
    t1.render("saureus_tree.svg", dpi=800, h=400)
    t1.write(format=0, outfile="new_tree.nw")
Beispiel #29
0
def main():
	args = parse_args()
	
	if args.data:
		print "\nReading tree from " + args.tree + " and data matrix from " + args.data
		tree = ClusterTree(args.tree, text_array=args.data)
	else:
		print "\nReading tree from " + args.tree
		tree = Tree(args.tree)	
		
	if args.midpoint:
		R = tree.get_midpoint_outgroup()
		tree.set_outgroup(R)
		print "- Applying midpoint rooting"
	elif args.outgroup:
		tree.set_outgroup( tree&args.outgroup )
		print "- Rooting using outgroup " + args.outgroup
		
	if not args.no_ladderize:
		tree.ladderize()
		print "- Ladderizing tree"

	table, column_list, column_values = readtable(args, tree.get_leaf_names())
	
	labels = []
	if args.labels:
		print "\nThese labels will be printed next to each strain:"
		for label in args.labels:
			if label in column_list:
				labels.append(label)
				print " " + label
			else:
				print "WARNING: specified label " + label + " was not found in the columns of the info file provided, " + args.info

	# set node styles
	# start by setting all to no shapes, black labels
	for n in tree.traverse():
		nstyle = NodeStyle()
		nstyle["fgcolor"] = "black"
		nstyle["size"] = 0
		n.set_style(nstyle)
	
	# add colour tags next to nodes
	if args.colour_tags:
		colour_tags = []
		print "\nThese columns will be used to generate colour tags:"
		for label in args.colour_tags:
			if label in column_list:
				colour_tags.append(label)
				print " " + label
			else:
				print "\tWARNING: specified label for colour tagging, " + label + ", was not found in the columns of the info file provided, " + args.info
				
		for i in range(0,len(colour_tags)):
			label = colour_tags[i]
			colour_dict = getColourPalette(column_values[label],args,label)
			
			print "- Adding colour tag for " + label
			
			for node in tree.get_leaves():
				this_face = Face()
				this_face.margin_left = args.padding
				node.add_face(this_face, column=0, position = "aligned")
				
				if node.name in table:
					this_label = table[node.name][label]
					this_colour = colour_dict[this_label]
				else:
					this_colour = "white"
				this_face = Face()
				this_face.background.color = this_colour
				this_face.margin_right = args.margin_right
				this_face.margin_left = args.margin_left
				this_face.margin_top = args.margin_top
				this_face.margin_bottom = args.margin_bottom
				this_face.border.width = args.border_width
				this_face.border.color="white"
				node.add_face(this_face, column=i+1, position = "aligned")
		print
	else:
		colour_tags = []
	
	# add labels as columns
	for i in range(0,len(labels)):
		
		label = labels[i]
		
		print "- Adding label " + label
		if label == args.colour_nodes_by:
			print "  also colouring nodes by these values"
		
		colour_dict = getColourPalette(column_values[label],args,label)
		
		for node in tree.get_leaves():
			if node.name in table:
				this_label = table[node.name][label]
				this_colour = colour_dict[this_label]
			else:
				this_label = ""
				this_colour = "black"

			this_face = TextFace(text=this_label, fsize = args.font_size)
			if args.tags:
				this_face.background.color = this_colour
			elif label == args.colour_nodes_by:
				this_face.fgcolor = this_colour
			this_face.margin_right = args.padding
			if i == 0:
				this_face.margin_left = args.padding
			node.add_face(this_face, column=i+len(colour_tags)+1, position = "aligned")
			
			# set leaves to coloured circles
			node.img_style["size"] = args.node_size
			if label == args.colour_nodes_by:
				node.img_style["fgcolor"] = this_colour
					
	if args.colour_branches_by or args.colour_backgrounds_by or args.branch_support_colour:
		if args.colour_branches_by:
			print "- Colouring branches by label " + args.colour_branches_by
			colour_dict_br = getColourPalette(column_values[args.colour_branches_by],args,args.colour_branches_by)
		if args.colour_backgrounds_by:
			print "- Colouring node backgrounds by label " + args.colour_backgrounds_by
			colour_dict_bg = getColourPalette(column_values[args.colour_backgrounds_by],args,args.colour_backgrounds_by)
		if args.branch_support_colour:
			print "- Colouring branches by support values"
			# colours extracted from R using rgb( colorRamp(c("white","red", "black"))(seq(0, 1, length = 100)), max = 255)
			# support_colours = {0.0:"#FFFFFF",0.01:"#FFFFFF", 0.02:"#FFF9F9", 0.03:"#FFF4F4", 0.04:"#FFEFEF", 0.05:"#FFEAEA", 0.06:"#FFE5E5", 0.07:"#FFE0E0", 0.08:"#FFDADA", 0.09:"#FFD5D5", 0.1:"#FFD0D0", 0.11:"#FFCBCB", 0.12:"#FFC6C6", 0.13:"#FFC1C1", 0.14:"#FFBCBC", 0.15:"#FFB6B6", 0.16:"#FFB1B1", 0.17:"#FFACAC", 0.18:"#FFA7A7", 0.19:"#FFA2A2", 0.2:"#FF9D9D", 0.21:"#FF9797", 0.22:"#FF9292", 0.23:"#FF8D8D", 0.24:"#FF8888", 0.25:"#FF8383", 0.26:"#FF7E7E", 0.27:"#FF7979", 0.28:"#FF7373", 0.29:"#FF6E6E", 0.3:"#FF6969", 0.31:"#FF6464", 0.32:"#FF5F5F", 0.33:"#FF5A5A", 0.34:"#FF5454", 0.35:"#FF4F4F", 0.36:"#FF4A4A", 0.37:"#FF4545", 0.38:"#FF4040", 0.39:"#FF3B3B", 0.4:"#FF3636", 0.41:"#FF3030", 0.42:"#FF2B2B", 0.43:"#FF2626", 0.44:"#FF2121", 0.45:"#FF1C1C", 0.46:"#FF1717", 0.47:"#FF1212", 0.48:"#FF0C0C", 0.49:"#FF0707", 0.5:"#FF0202", 0.51:"#FC0000", 0.52:"#F70000", 0.53:"#F20000", 0.54:"#EC0000", 0.55:"#E70000", 0.56:"#E20000", 0.57:"#DD0000", 0.58:"#D80000", 0.59:"#D30000", 0.6:"#CE0000", 0.61:"#C80000", 0.62:"#C30000", 0.63:"#BE0000", 0.64:"#B90000", 0.65:"#B40000", 0.66:"#AF0000", 0.67:"#A90000", 0.68:"#A40000", 0.69:"#9F0000", 0.7:"#9A0000", 0.71:"#950000", 0.72:"#900000", 0.73:"#8B0000", 0.74:"#850000", 0.75:"#800000", 0.76:"#7B0000", 0.77:"#760000", 0.78:"#710000", 0.79:"#6C0000", 0.8:"#670000", 0.81:"#610000", 0.82:"#5C0000", 0.83:"#570000", 0.84:"#520000", 0.85:"#4D0000", 0.86:"#480000", 0.87:"#420000", 0.88:"#3D0000", 0.89:"#380000", 0.9:"#330000", 0.91:"#2E0000", 0.92:"#290000", 0.93:"#240000", 0.94:"#1E0000", 0.95:"#190000", 0.96:"#140000", 0.97:"#0F0000", 0.98:"#0A0000", 0.99:"#050000", 1:"#000000"}
			# rgb( colorRamp(c("red", "black"))(seq(0, 1, length = 100)), max = 255))
			support_colours = {}
			
			if args.branch_support_cutoff:
				for i in range(0,args.branch_support_cutoff):
					support_colours[i] = "#FF0000"
				for i in range(args.branch_support_cutoff,101):
					support_colours[i] = "#000000"
			else:
				if args.branch_support_percent:
					support_colours = {0:"#FF0000",1:"#FF0000",2:"#FC0000",3:"#F90000",4:"#F70000",5:"#F40000",6:"#F20000",7:"#EF0000",8:"#EC0000",9:"#EA0000",10:"#E70000",11:"#E50000",12:"#E20000",13:"#E00000",14:"#DD0000",15:"#DA0000",16:"#D80000",17:"#D50000",18:"#D30000",19:"#D00000",20:"#CE0000",21:"#CB0000",22:"#C80000",23:"#C60000",24:"#C30000",25:"#C10000",26:"#BE0000",27:"#BC0000",28:"#B90000",29:"#B60000",30:"#B40000",31:"#B10000",32:"#AF0000",33:"#AC0000",34:"#AA0000",35:"#A70000",36:"#A40000",37:"#A20000",38:"#9F0000",39:"#9D0000",40:"#9A0000",41:"#970000",42:"#950000",43:"#920000",44:"#900000",45:"#8D0000",46:"#8B0000",47:"#880000",48:"#850000",49:"#830000",50:"#800000",51:"#7E0000",52:"#7B0000",53:"#790000",54:"#760000",55:"#730000",56:"#710000",57:"#6E0000",58:"#6C0000",59:"#690000",60:"#670000",61:"#640000",62:"#610000",63:"#5F0000",64:"#5C0000",65:"#5A0000",66:"#570000",67:"#540000",68:"#520000",69:"#4F0000",70:"#4D0000",71:"#4A0000",72:"#480000",73:"#450000",74:"#420000",75:"#400000",76:"#3D0000",77:"#3B0000",78:"#380000",79:"#360000",80:"#330000",81:"#300000",82:"#2E0000",83:"#2B0000",84:"#290000",85:"#260000",86:"#240000",87:"#210000",88:"#1E0000",89:"#1C0000",90:"#190000",91:"#170000",92:"#140000",93:"#120000",94:"#0F0000",95:"#0C0000",96:"#0A0000",97:"#070000",98:"#050000",99:"#020000",100:"#000000"}
				else:
					support_colours = {0.0:"#FF0000", 0.01:"#FF0000", 0.02:"#FC0000", 0.03:"#F90000", 0.04:"#F70000", 0.05:"#F40000", 0.06:"#F20000", 0.07:"#EF0000", 0.08:"#EC0000", 0.09:"#EA0000", 0.1:"#E70000", 0.11:"#E50000", 0.12:"#E20000", 0.13:"#E00000", 0.14:"#DD0000", 0.15:"#DA0000", 0.16:"#D80000", 0.17:"#D50000", 0.18:"#D30000", 0.19:"#D00000", 0.2:"#CE0000", 0.21:"#CB0000", 0.22:"#C80000", 0.23:"#C60000", 0.24:"#C30000", 0.25:"#C10000", 0.26:"#BE0000", 0.27:"#BC0000", 0.28:"#B90000", 0.29:"#B60000", 0.3:"#B40000", 0.31:"#B10000", 0.32:"#AF0000", 0.33:"#AC0000", 0.34:"#AA0000", 0.35:"#A70000", 0.36:"#A40000", 0.37:"#A20000", 0.38:"#9F0000", 0.39:"#9D0000", 0.4:"#9A0000", 0.41:"#970000", 0.42:"#950000", 0.43:"#920000", 0.44:"#900000", 0.45:"#8D0000", 0.46:"#8B0000", 0.47:"#880000", 0.48:"#850000", 0.49:"#830000", 0.5:"#800000", 0.51:"#7E0000", 0.52:"#7B0000", 0.53:"#790000", 0.54:"#760000", 0.55:"#730000", 0.56:"#710000", 0.57:"#6E0000", 0.58:"#6C0000", 0.59:"#690000", 0.6:"#670000", 0.61:"#640000", 0.62:"#610000", 0.63:"#5F0000", 0.64:"#5C0000", 0.65:"#5A0000", 0.66:"#570000", 0.67:"#540000", 0.68:"#520000", 0.69:"#4F0000", 0.7:"#4D0000", 0.71:"#4A0000", 0.72:"#480000", 0.73:"#450000", 0.74:"#420000", 0.75:"#400000", 0.76:"#3D0000", 0.77:"#3B0000", 0.78:"#380000", 0.79:"#360000", 0.8:"#330000", 0.81:"#300000", 0.82:"#2E0000", 0.83:"#2B0000", 0.84:"#290000", 0.85:"#260000", 0.86:"#240000", 0.87:"#210000", 0.88:"#1E0000", 0.89:"#1C0000", 0.9:"#190000", 0.91:"#170000", 0.92:"#140000", 0.93:"#120000", 0.94:"#0F0000", 0.95:"#0C0000", 0.96:"#0A0000", 0.97:"#070000", 0.98:"#050000", 0.99:"#020000", 1.0:"#000000"}
		for node in tree.traverse():			
			nstyle = NodeStyle()
			nstyle["size"] = 0
			if node.name in table:
				#print "Colouring individual " + node.name
				if args.colour_branches_by:
					nstyle["vt_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]] # set branch colour
					nstyle["hz_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]]
				if args.colour_backgrounds_by:
					if args.colour_branches_by in table[node.name]:
						if table[node.name][args.colour_branches_by] != "none":
							nstyle["bgcolor"] = colour_dict_bg[table[node.name][args.colour_backgrounds_by]] # set background colour
				node.set_style(nstyle)
			else:
				# internal node
				descendants = node.get_leaves()
				descendant_labels_br = []
				descendant_labels_bg = []
				for d in descendants:
					if args.colour_branches_by:
						if d.name in table:
							this_label_br = table[d.name][args.colour_branches_by]
							if this_label_br not in descendant_labels_br:
								descendant_labels_br.append(this_label_br)
						elif "none" not in descendant_labels_br:
							descendant_labels_br.append("none")
					if args.colour_backgrounds_by:
						if d.name in table:
							this_label_bg = table[d.name][args.colour_backgrounds_by]
							if this_label_bg not in descendant_labels_bg:
								descendant_labels_bg.append(this_label_bg)
						elif "none" not in descendant_labels_bg:
							descendant_labels_bg.append("none")
#				nstyle = NodeStyle()
#				nstyle["size"] = 0
				if len(descendant_labels_br) == 1 and descendant_labels_br[0] != "none":
					this_colour = colour_dict_br[descendant_labels_br[0]]
					nstyle["vt_line_color"] = this_colour # set branch colour
					nstyle["hz_line_color"] = this_colour
				elif args.branch_support_colour and not node.is_leaf():
					if int(node.support) in support_colours:
						nstyle["vt_line_color"] = support_colours[int(node.support)] # take colour from support value
						nstyle["hz_line_color"] = support_colours[int(node.support)]
					else:
						print "  WARNING support values don't make sense. Note scale is assumed to be 0-1 unless using the --branch_support_percent flag."
				if len(descendant_labels_bg) == 1 and descendant_labels_bg[0] != "none":
					this_colour = colour_dict_bg[descendant_labels_bg[0]]
					nstyle["bgcolor"] = this_colour # set background colour
				node.set_style(nstyle)
					
	if args.colour_nodes_by:
		if args.colour_nodes_by not in labels:
			print "- Colouring nodes by label " + args.colour_nodes_by
			colour_dict = getColourPalette(column_values[args.colour_nodes_by],args,args.colour_nodes_by)
			for node in tree.get_leaves():
				if node.name in table:
					this_label = table[node.name][args.colour_nodes_by]
					this_colour = colour_dict[this_label]
					if this_colour != "None":
						node.img_style["fgcolor"] = this_colour
						node.img_style["size"] = args.node_size
				

	for node in tree.traverse():
		node.img_style["hz_line_width"] = args.branch_thickness
		node.img_style["vt_line_width"] = args.branch_thickness
			
	# set tree style
	ts = TreeStyle()
	
	if args.show_leaf_names:
		ts.show_leaf_name = True
	else:
		ts.show_leaf_name = False

	if args.length_scale:
		ts.scale = args.length_scale
		
	if args.branch_padding:
		ts.branch_vertical_margin = args.branch_padding
		
	if args.branch_support_print:
		ts.show_branch_support = True
		
	if args.fan:
		ts.mode = "c"
		print "\nPrinting circular tree (--fan)"
	else:
		print "\nPrinting rectangular tree, to switch to circular use --fan"
		
	if args.title:
		title = TextFace(args.title, fsize=20)
		title.margin_left = 20
		title.margin_top = 20
		ts.title.add_face(title, column=1)
		
	if args.no_guiding_lines:
		ts.draw_guiding_lines = False
		
	if args.data:
		print "\nPrinting data matrix as " + args.data_type + " with range (" +  str(args.mindata) + "->" +  str(args.maxdata) + ";" +  str(args.centervalue) + "), height " + str(args.data_height) + ", width " + str(args.data_width)
		profileFace  = ProfileFace(min_v=args.mindata, max_v=args.maxdata, center_v=args.centervalue, width=args.data_width, height=args.data_height, style=args.data_type)
		def mylayout(node):
			if node.is_leaf():
				add_face_to_node(profileFace, node, 0, aligned=True)
		ts.layout_fn = mylayout
		
	# set root branch length to zero
	tree.dist=0
	
	# render tree
	tree.render(args.output, w=args.width, dpi=300, units="mm", tree_style=ts)
	
	print "\n FINISHED! Tree plot printed to file " + args.output
	print
	
	if args.print_colour_dict:
		print colour_dict
		if args.colour_branches_by:
			print colour_dict_br
		if args.colour_backgrounds_by:
			print colour_dict_bg
	
	if args.interactive:
		print "\nEntering interactive mode..."
		tree.show(tree_style=ts)
Beispiel #30
0
def MakePlot(x, org_names, ckm30, ckm50, outgroup, outfile, outfilexml, sum_x):

    #Make sure names are unique
    names = org_names
    for name in names:
        if names.count(name) > 1:
            temp_name = name
            i = 1
            for dummy in range(
                    0,
                    names.count(name) - 1
            ):  #Don't change the last one, just to make sure we don't conflict with the outgroup
                names[names.index(temp_name)] = temp_name + "_" + str(i)
                i = i + 1

    #Normalize the x vector
    x = map(lambda y: y / sum(x), x)
    ckm30_norm = np.multiply(ckm30, 1 / np.diag(ckm30))
    ckm50_norm = np.multiply(ckm50, 1 / np.diag(ckm50))
    num_rows = ckm30_norm.shape[0]
    num_cols = ckm30_norm.shape[1]
    matrix = list()
    for i in range(num_rows):
        matrix.append([
            .5 * (1 - .5 * ckm30_norm[i, j] - .5 * ckm30_norm[j, i]) + .5 *
            (1 - .5 * ckm50_norm[i, j] - .5 * ckm50_norm[j, i])
            for j in range(i + 1)
        ])

    #Make the list of distances (ave of the two ckm matrices)
    ckm_ave_train = .5 * ckm30_norm + .5 * ckm50_norm
    ckm_ave_train_dist = dict()
    for i in range(len(org_names)):
        ckm_ave_train_dist[org_names[i]] = [
            .5 * ckm_ave_train[i, j] + .5 * ckm_ave_train[j, i]
            for j in range(len(org_names))
        ]

    #Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish.
    dm = _DistanceMatrix(names, matrix)
    constructor = DistanceTreeConstructor()
    tree = constructor.nj(dm)
    t = Tree(tree.format('newick'), format=1)

    #tree.format('newick')
    #Phylo.draw_ascii(tree)

    #Now I will put internal nodes in a certain phylogenetic distance between the root and a given node.
    #Function to insert a node at a given distance
    def insert_node(t, name_to_insert, insert_above, dist_along):
        insert_at_node = t.search_nodes(name=insert_above)[0]
        parent = (t & insert_above).up
        orig_branch_length = t.get_distance(insert_at_node, parent)
        if orig_branch_length < dist_along:
            raise ValueError(
                "error: dist_along larger than orig_branch_length in PlotPackage.py"
            )
        removed_node = insert_at_node.detach()
        removed_node.dist = orig_branch_length - dist_along
        added_node = parent.add_child(name=name_to_insert, dist=dist_along)
        added_node.add_child(removed_node)

    #Function to insert a node some % along a branch, taking into account the ckm distances and nodes already created in the NJ tree (and what distance their descendants are from everyone else)
    def insert_hyp_node(t, leaf_name, percent, ckm_ave_train_dist, org_names):
        dists = map(lambda y: abs(y - percent), ckm_ave_train_dist[leaf_name])
        nearby_indicies = list()
        #Add all the organisms that are within 0.05 of the given percent
        #	for i in range(len(dists)):
        #		if dists[i]<=.05:
        #			nearby_indicies.append(i)
        nearby_names = list()
        #If there are no nearby indicies, add the closest organism to the given percent
        if nearby_indicies == []:
            nearby_names.append(org_names[dists.index(min(dists))])
        else:
            for i in range(len(nearby_indicies)):
                nearby_names.append(org_names[i])
        mean_dist = np.mean(
            map(lambda y: ckm_ave_train_dist[leaf_name][org_names.index(y)],
                nearby_names))
        nearby_names.append(leaf_name)
        LCA = t.get_common_ancestor(nearby_names)
        LCA_to_leaf_dist = t.get_distance(LCA, leaf_name)
        #divide the dist to the right/left of the LCA node by the number of percentage points in there
        if LCA.name == t.name:
            percent_dist = percent * LCA_to_leaf_dist
            if mean_dist <= percent:
                child_node = (t & leaf_name)
            else:
                child_node = (
                    t & nearby_names[0]
                )  #This means "go up from root" in the direction of the nearest guy
            ancestor_node = (t & child_node.name).up
        elif mean_dist <= percent:
            percent_dist = t.get_distance(LCA) + abs(percent - mean_dist) * (
                LCA_to_leaf_dist) / (1 - mean_dist)
            child_node = (t & leaf_name)
            ancestor_node = (t & child_node.name).up
        else:
            percent_dist = t.get_distance(LCA) - abs(percent - mean_dist) * (
                t.get_distance(LCA)) / (mean_dist)
            child_node = (t & leaf_name)
            ancestor_node = (t & child_node.name).up
        while t.get_distance(t.name, ancestor_node) > percent_dist:
            child_node = ancestor_node
            ancestor_node = (t & child_node.name).up
        insert_node(t, leaf_name + "_" + str(percent), child_node.name,
                    percent_dist - t.get_distance(t.name, ancestor_node))

    #Set outgroup
    if outgroup in names:
        t.set_outgroup(
            t & outgroup
        )  #I will need to check that this outgroup is actually one of the names...
    else:
        print("WARNING: the chosen outgroup " + outgroup +
              " is not in the given taxonomy: ")
        print(names)
        print(
            "Proceeding without setting an outgroup. This may cause results to be uninterpretable."
        )

    #Insert hypothetical nodes
    hyp_node_names = dict()
    cutoffs = [.9, .8, .7, .6, .5, .4, .3, .2, .1]
    cutoffs = [
        -.5141 * (val**3) + 1.0932 * (val**2) + 0.3824 * val for val in cutoffs
    ]
    for i in range(len(org_names)):
        xi = x[i:len(x):len(org_names)]
        for j in range(1, len(cutoffs) + 1):
            if xi[j] > 0:
                insert_hyp_node(t, org_names[i], cutoffs[j - 1],
                                ckm_ave_train_dist, org_names)
                hyp_node_names[org_names[i] + "_" + str(cutoffs[j - 1])] = [
                    org_names[i], cutoffs[j - 1], j - 1
                ]  #in case there are "_" in the file names

    size_factor = 250
    font_size = 55

    #Now put the bubbles on the nodes
    def layout(node):
        node_style = NodeStyle()
        node_style["hz_line_width"] = 10
        node_style["vt_line_width"] = 10
        node.set_style(node_style)
        #print(node)
        if node.is_leaf():
            if node.name in org_names:
                #make reconstructed bubble
                size = x[org_names.index(node.name)]
                F = CircleFace(radius=size_factor * math.sqrt(size),
                               color="RoyalBlue",
                               style="sphere")
                F.border.width = None
                F.opacity = 0.6
                faces.add_face_to_node(F, node, 0, position="branch-right")
                #Denote that this was a training organism
                nameFace = AttrFace("name", fsize=font_size, fgcolor='black')
                faces.add_face_to_node(nameFace,
                                       node,
                                       0,
                                       position="branch-right")
        elif node.name in hyp_node_names:  #Otherwise it's a hypothetical node, just use recon x
            node_base_name = hyp_node_names[node.name][0]
            percent = hyp_node_names[node.name][1]
            if node_base_name in org_names:
                idx = hyp_node_names[node.name][2]
                size = x[org_names.index(node_base_name) +
                         (idx + 1) * len(org_names)]
                F = CircleFace(radius=size_factor * math.sqrt(size),
                               color="RoyalBlue",
                               style="sphere")
                F.border.width = None
                F.opacity = 0.6
                faces.add_face_to_node(F, node, 0, position="branch-right")
                #This is if I want the names of the hypothetical nodes to be printed as well
                #nameFace = AttrFace("name", fsize=font_size, fgcolor='black')
                #faces.add_face_to_node(nameFace, node, 0, position="branch-right")
            else:
                size = 0
        else:
            size = 0

    ts = TreeStyle()
    ts.layout_fn = layout
    ts.mode = "r"
    #ts.mode = "c"
    ts.scale = 2 * 1000
    ts.show_leaf_name = False
    ts.min_leaf_separation = 50
    F = CircleFace(radius=.87 * size_factor, color="RoyalBlue", style="sphere")
    F.border.width = None
    F.opacity = 0.6
    ts.legend.add_face(F, 0)
    ts.legend.add_face(
        TextFace("  Inferred relative abundance",
                 fsize=1.5 * font_size,
                 fgcolor="Blue"), 1)
    ts.legend.add_face(
        TextFace("  Total absolute abundance depicted " + str(sum_x)[0:8],
                 fsize=1.5 * font_size,
                 fgcolor="Black"), 1)
    ts.legend_position = 4
    #t.show(tree_style=ts)
    t.render(outfile, w=550, units="mm", tree_style=ts)

    #Redner the XML file
    project = Phyloxml()
    phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[]))
    project.add_phylogeny(phylo)
    project.export(open(outfilexml, 'w'))
Beispiel #31
0
 def ly_supports(node):
     if not node.is_leaf() and node.up:
         supFace = TextFace("%0.2g" %(node.support), fsize=7, fgcolor='indianred')
         add_face_to_node(supFace, node, column=0, position='branch-top')
Beispiel #32
0
                cons = get_consensus(multi, cognateGuideTree, recon_alg="sankoff_parsimony", gaps=True, classes=False, rep_weights = rep_weights, local = "gap")

                cognateParsimony = 0.0
                
                #aggregate the parsimony value
                for i in range(len(cognateGuideTree.reconstructed)):
                    cognateParsimony += min(cognateGuideTree.sankoffTable[i].values())
                    
                familyParsimony += cognateParsimony
                    
                print("Reconstructed proto-" + familyName + " word for concept " + str(conceptID - 3) + ":\t" + cons + "\twith average parsimony " + str(cognateParsimony / len(cognateLangs)))
                 
                #PRINT OUT RECONSTRUCTION STEPS IN A TREE VISUALIZATION
                if graphicalOutput:
                    outputTree = Tree()
                    outputTree.add_face(TextFace(str("".join(cognateGuideTree.reconstructed))), column=0, position = "branch-right")
                    def copyChildrenIntoOutput(treeNode, outputTreeNode):
                        for child in treeNode.Children:
                            outputChild = outputTreeNode.add_child()
                            if child.isTip():
                                outputChild.name = str("".join(child.reconstructed)) + " (" + cognateNameTable[int(child.Name)] + ")"
                            else:
                                outputChild.add_face(TextFace(str("".join(child.reconstructed))), column=0, position = "branch-right")
                                copyChildrenIntoOutput(child, outputChild) 
                    copyChildrenIntoOutput(cognateGuideTree,outputTree)
                    outputTree.render("output/" + phylName + "/" + familyName + "/" + str(conceptID - 3) + "." + cons.replace("-","") +".png")
                
                #print("\nDetermining and counting sound changes at the edges of the guide tree, and cascading them to the supertrees:")
                for node in cognateGuideTree.postorder():
                    if not hasattr(node, "recon_changes"):
                        node.recon_changes = {}
Beispiel #33
0
        spaciators.add(len(column_header) - 1)

    header2column = dict([(name, i) for i, name in enumerate(column_header)])

    ts = TreeStyle()
    ts.mode = 'r'
    ts.draw_guiding_lines = False
    ts.show_leaf_name = False
    ts.force_topology = False
    ts.layout_fn = layout
    ts.tree_width = 800
    ts.draw_aligned_faces_as_table = True

    for i, name in enumerate(column_header):
        if name:
            headerF = TextFace(str(name), fgcolor=column_color[i], fsize=40)
            headerF.rotation = -85
        else:
            headerF = RectFace(300, 5, "white", "white")
        ts.aligned_header.add_face(headerF, i)

    #tree_files = sys.argv[1:]
    for treefile in args.tree_files:
        output = treefile + '.png'
        print 'rendering', output
        try:
            t = Tree(open(treefile).read().replace('|', ','))
        except Exception, e:
            print e, treefile
        else:
            t.set_outgroup(t.get_midpoint_outgroup())