Exemple #1
0
    def ly_block_alg(node):
        if node.is_leaf():
            if 'sequence' in node.features:
                seqFace = SeqMotifFace(node.sequence, [])
                # [10, 100, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|domain Name"],
                motifs = []
                last_lt = None
                for c, lt in enumerate(node.sequence):
                    if lt != '-':
                        if last_lt is None:
                            last_lt = c
                        if c + 1 == len(node.sequence):
                            start, end = last_lt, c
                            motifs.append([
                                start, end, "()", 0, 12, "slategrey",
                                "slategrey", None
                            ])
                            last_lt = None
                    elif lt == '-':
                        if last_lt is not None:
                            start, end = last_lt, c - 1
                            motifs.append([
                                start, end, "()", 0, 12, "grey", "slategrey",
                                None
                            ])
                            last_lt = None

                seqFace = SeqMotifFace(node.sequence,
                                       motifs,
                                       intermotif_format="line",
                                       seqtail_format="line",
                                       scale_factor=ALG_SCALE)
                add_face_to_node(seqFace, node, ALG_START_COL, aligned=True)
Exemple #2
0
def get_example_tree():
        # Create a random tree and add to each leaf a random set of motifs
        # from the original set
        t = Tree("( (A, B, C, D, E, F, G), H, I);")

        seqFace = SeqMotifFace(seq, gapcolor="red")
        (t & "A").add_face(seqFace, 0, "aligned")

        seqFace = SeqMotifFace(seq, seq_format="line", gap_format="blank")
        (t & "B").add_face(seqFace, 0, "aligned")

        seqFace = SeqMotifFace(seq, seq_format="line")
        (t & "C").add_face(seqFace, 0, "aligned")
        
        seqFace = SeqMotifFace(seq, seq_format="()")
        (t & "D").add_face(seqFace, 0, "aligned")

        seqFace = SeqMotifFace(seq, motifs=simple_motifs, seq_format="-")
        (t & "E").add_face(seqFace, 0, "aligned")

        seqFace = SeqMotifFace(seq=None, motifs=simple_motifs, gap_format="blank")
        (t & "F").add_face(seqFace, 0, "aligned")

        seqFace = SeqMotifFace(seq, motifs=mixed_motifs, seq_format="-")
        (t & "G").add_face(seqFace, 0, "aligned")

        
        seqFace = SeqMotifFace(seq=None, motifs=box_motifs, gap_format="line")
        (t & "H").add_face(seqFace, 0, "aligned")


        seqFace = SeqMotifFace(seq[30:60], seq_format="seq")
        (t & "I").add_face(seqFace, 0, "aligned")
        
        return t
Exemple #3
0
 def add_domains_to_tree(self, t):
     '''
     displaying domains without a sequence / MSA
     '''
     for leaf in t:
         gene_id = leaf.name
         domains = self.domain_dict.get(gene_id, [])
         # if no domains are annotated, 'domains' is an empty list and
         # no motifs are added to the sequence (for loop won't iterate)
         motifs = []
         dom_n = 0
         for domain in domains:
             domname, start, stop = domain
             start = dom_n * 55
             stop = dom_n * 55 + 50
             dom_n += 1
             color_n = self.domains.index(domname)
             try:
                 dom_color = COLORS[color_n]
             except IndexError:
                 dom_color = 'gray'
             motifs.append([
                 start, stop, '()', None, 10, None,
                 'rgradient:{}'.format(dom_color),
                 'arial|6|black|{}'.format(domname)
             ])
         if len(motifs) > 0:
             domface = SeqMotifFace(None, motifs=motifs, gap_format='line')
             (t & gene_id).add_face(domface, column=0, position='aligned')
     return
def generate_neighbour_face(node_ns, ns_colors, block, offsets):
    motifs = [[0, 0, "blank", None, 10, None, None, None]]

    for cur_offset, next_offset, node_n in zip(offsets,
                                               offsets.tolist()[1:], node_ns):
        if len(node_n) > 0:
            (nbr1, nbr2, copies, orientations) = node_n

            motifs.extend(get_neighbour_motifs(nbr1, ns_colors, cur_offset))

            for i, or_ in enumerate(orientations):
                motifs.extend(
                    get_neighbour_motifs(
                        f'{block}' + ('h' if or_ == '+' else 't'), ns_colors,
                        cur_offset + (i + 1) * 50))

            motifs.extend(
                get_neighbour_motifs(nbr2, ns_colors,
                                     cur_offset + (copies + 1) * 50, True))

            motifs.append([
                cur_offset + (copies + 1) * 50 + 40, next_offset, "blank",
                None, 10, None, None, None
            ])
        else:
            motifs.append(
                [cur_offset, next_offset, "blank", None, 10, None, None, None])

    for offset in offsets[1:-1]:
        motifs.append(
            [offset - 10, offset - 10, "[]", None, 10, 'grey', 'grey', None])
        motifs.append(
            [offset - 10, offset, "blank", None, 10, None, None, None])

    return SeqMotifFace('', motifs=motifs)
Exemple #5
0
	def get_example_tree_2():
		# Create a random tree and add to each leaf a random set of motifs
		# from the original set
		t= Tree(nwTree)
		for item in nwTree.replace('(','').replace(')', '').replace(';', '').replace(',','\t').split('\t'):
			seqFace2 = SeqMotifFace(seq, motifs=motifDict_2[item[:item.index(':')]], seq_format="-", gap_format="blank")
			(t & item[:item.index(':')]).add_face(seqFace2, 0, "aligned")
		return t
Exemple #6
0
    def add_msa_with_domains_to_tree(self, t):
        '''
        iterating over all sequences in the tree and adding
        the sequence, domain and intron position visualizations.
        '''
        for leaf in t:
            gene_id = leaf.name
            gapped_seq = self.msa_fasta_dict[gene_id]
            domains = self.domain_dict.get(gene_id, [])
            # if no domains are annotated, 'domains' is an empty list and
            # no motifs are added to the sequence (for loop won't iterate)
            motifs = []
            for domain in domains:
                domname, start, stop = domain
                color_n = self.domains.index(domname)
                try:
                    dom_color = COLORS[color_n]
                except IndexError:
                    dom_color = 'gray'
                motifs.append([
                    start, stop, '()', None, 10, None,
                    'rgradient:{}'.format(dom_color),
                    'arial|6|black|{}'.format(domname)
                ])  # domain markers

            if hasattr(self, 'cds_length_dict'):
                if gene_id not in self.cds_length_dict:
                    print('Warning: No GFF entry found for {}'.format(gene_id))
                else:
                    cds_lengths = self.cds_length_dict[gene_id]
                    current_pos = 1
                    for cds_len in cds_lengths[:-1]:
                        current_pos += cds_len

                        gapped_seq = self.msa_fasta_dict[gene_id]
                        try:
                            gapped_pos, __ = self.correct_borders_for_gaps(
                                gapped_seq, int(round(current_pos)), 0)
                        except KeyError:
                            raise Exception(
                                'The protein sequence of {} is shorter '
                                'in the MSA than in the GFF!'.format(gene_id))

                        motifs.append([
                            (gapped_pos - 1), (gapped_pos + 1), '[]', None, 10,
                            None, 'black', None
                        ])  # black line that marks the intron positions

            seqface = SeqMotifFace(gapped_seq,
                                   gapcolor='gray',
                                   motifs=motifs,
                                   seq_format=self.seq_style,
                                   gap_format=self.gap_style,
                                   scale_factor=self.scale_factor)
            (t & gene_id).add_face(seqface, column=0, position='aligned')
        return
Exemple #7
0
def _tree_visual(typeListFilter, motifData, hmmPath, finalPath, cachePath):
    t = Tree(os.path.join(cachePath, 'sequence.fasta.phy'))
    ts = TreeStyle()

    for sq in range(len(typeListFilter)):
        seqFace = SeqMotifFace((10 + 60 * len(motifData[sq])) * 'A',
                               motifs=motifData[sq],
                               seq_format="-")
        (t
         & 'Type ' + str(sq + 1) + ' [' + str(typeList[sq][1]) + ']').add_face(
             seqFace, 0, "aligned")
    t.render(os.path.join(finalPath, 'phylogeneticTree.pdf'), tree_style=ts)
Exemple #8
0
def _create_tree (tree,fasta,out,color):
    seqs = SeqGroup(fasta, format="fasta")
    t = Tree(tree)
    colors = _parse_color_file(color)
    node_names = t.get_leaf_names()
    for name in node_names:
        seq = seqs.get_seq(name)
        seqFace = SeqMotifFace(seq, seq_format="()")
        node = t.get_leaves_by_name(name)
        for i in range(0,len(node)):
            if name in colors:
                ns = NodeStyle()
                ns['bgcolor'] = colors[name]
                node[i].set_style(ns)
            node[i].add_face(seqFace,0,'aligned')
    t.render(out)
def layout(node):
    node.img_style["size"] = 0

    #add boostrap in not-leaf nodes
    if not node.is_leaf():
        boostFace = faces.TextFace(node.support, fgcolor="grey", fsize=34)
        add_face_to_node(boostFace, node, column=0, position="branch-top")

    if node.is_leaf():

        #get taxid and name
        taxid = (node.name.split('.')[0])
        seq_name = (node.name.split('.')[1])

        #add predicted name (eggnog-mapper)
        try:
            pred_name = predict_name[node.name]
            predNameFace = faces.TextFace(pred_name,
                                          fgcolor="salmon",
                                          fsize=34)
            predNameFace.margin_right = 20
            predNameFace.margin_left = 20
            add_face_to_node(predNameFace,
                             node,
                             column=2,
                             position="branch-right")
        except:
            predNameFace = faces.TextFace('--', fgcolor="salmon", fsize=34)
            add_face_to_node(predNameFace,
                             node,
                             column=2,
                             position="branch-right")

        #add sequence name (seq_name)
        seqNameFace = faces.TextFace(seq_name, fgcolor="grey", fsize=34)
        add_face_to_node(seqNameFace, node, column=1, position="branch-right")

        #get scientific name from taxid
        sp_name = ncbi.get_taxid_translator([(node.name.split('.')[0])])
        node.name = sp_name[int(taxid)]

        #add alignment
        seqFace = SeqMotifFace(node.sequence, gap_format="blank")
        add_face_to_node(seqFace, node, column=3, position="aligned")
Exemple #10
0
def custom_layout(node):

    if node.is_leaf():
        #FACES IN LEAFS
        #Add face with node.name
        total_name = (node.name)
        aligned_name_face = TextFace(total_name, fgcolor='gray', fsize=11)
        add_face_to_node(aligned_name_face,
                         node,
                         column=0,
                         position='branch-right')

        #if tree has an alignment, add a face with alignment
        if node.sequence:
            seqFace = SeqMotifFace(node.sequence, gap_format="blank")
            add_face_to_node(seqFace, node, column=1, position="aligned")

    else:
        #FACES IN INTERNAL NODES
        #Draws nodes as small blue squares of diameter equal to 3 pixels
        node.img_style['size'] = 3
        node.img_style['shape'] = 'square'

        #if internal node has a name, add a face
        if node.name:
            name_face = TextFace(node.name, fgcolor='brown', fsize=10)
            name_face.margin_bottom = 1
            add_face_to_node(name_face, node, column=0, position='branch-top')

        #if internal node has support (ej. boostrap), add a face
        if node.support:
            support_face = TextFace(node.support, fgcolor='indianred', fsize=8)
            support_face.margin_bottom = 1
            add_face_to_node(support_face,
                             node,
                             column=0,
                             position='branch-bottom')
    def design_tree(self, node_scores="CALCULATE", plot_threshold=0):
        """Method that allows us to design a tree with It's node scores.
        """
        try:
            if node_scores == "CALCULATE":
                self.calculate_nodes()
            else:  # DUDAS EN ESTA OPCION
                tree = PhyloTree(self.tree_in,
                                 alignment=self.align_in,
                                 alg_format="fasta")
                node_number = 0
                for node in tree.traverse():
                    node.add_feature("node_number", node_number)
                    node.add_feature("node_score", node_scores[node_number])
                    node_number += 1
                self.processed_tree = tree

            for node in self.processed_tree.traverse():
                if node.is_leaf() == True:
                    draw_position = 0
                    for position in self.position_matrix:
                        seqFace = SeqMotifFace(node.sequence[position],
                                               seq_format="seq")
                        (self.processed_tree & node.name).add_face(
                            seqFace, draw_position, "aligned")
                        draw_position += 1
                else:
                    if node.node_score > plot_threshold:
                        score_face = TextFace(node.node_score)
                        node.add_face(score_face, 0, "branch-top")

        except:
            sys.stderr.write("Error at designing tree.\n")
            sys.exit(1)

        return
Exemple #12
0
def get_example_tree():
    # Performs a tree reconciliation analysis
    gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
    t = PhyloTree(gene_tree_nw)
    ts = TreeStyle()
    # disable default PhyloTree Layout
    ts.layout_fn = lambda x: True

    t.link_to_alignment(alg)
    node2content = t.get_cached_content()
    for node in t.traverse():
        node.img_style["size"] = 0

        if not node.is_leaf():
            leaves = node2content[node]
            # get columns with different aa
            subseqs, relevant_columns  = mutation_columns([lf.sequence for lf in leaves])
            for seq in subseqs:
                f = SeqMotifFace(seq, seq_format="seq", width=10, height=8)
                f.margin_top = 2
                f.margin_right = 6
                node.add_face(f, column=0, position="branch-bottom")
            for j, col in enumerate(relevant_columns):
                col_f = RectFace(10, 10, fgcolor=None, bgcolor=None,
                                 label={"text":str(col), "fonttype":"Courier", "color":"black", "fontsize":6})
                node.add_face(col_f, column=j, position="branch-top")
                col_f.margin_bottom = 2
        else:
            f = SeqMotifFace(node.sequence, seq_format="seq", width=6)
            node.add_face(f, column=0, position="aligned")

    alg_length = len(lf.sequence)
    ts.draw_aligned_faces_as_table = False
    for colnum in xrange(alg_length):
        col_f = RectFace(10, 10, fgcolor=None, bgcolor=None,
                         label={"text":str(colnum), "fonttype":"Courier", "color":"black", "fontsize":6})
        ts.aligned_header.add_face(col_f, column=colnum)
    return t, ts
def visualize_phylogeny(gene_dict, context_file):

    #Read in tree and assign additional information to each leaf
    t = Tree(context_file[0].replace('.fna', '.unique.tree'))

    for node in t.traverse():
        if node.is_leaf():
            id = node.name.split('__')[1]
            node.add_features(organism=gene_dict[id]['organism'])
            node.add_features(assembly=gene_dict[id]['assembly'])
            node.add_features(pident=gene_dict[id]['perc_id'])
            if args.compressed == True:
                node.add_features(cluster_size=gene_dict[id]['cluster_size'])

    #Create dictionary to append motifs to
    motif_dict = {}

    #Create keyword lists to set gene color
    tnps = [
        'iscr',
        'transpos',
        'tnp',
        'insertion',
        '-like',
    ]
    ints = ['inti', 'integrase', 'xerc', 'xerd']
    mobiles=['secretion', 'mobiliza', 'moba', 'mobb', 'mobc', 'mobl', 'plasmid', 'relaxase',\
     'conjugation', 'type iv']
    res = [
        'lactam', 'aminoglyco', 'fluoroquinolo', 'tetracyclin', 'macrolid',
        'carbapenem'
    ]

    print('decorating the tree...')
    #Create motifs for each gene associated with a leaf
    for leaf in t.traverse():
        if leaf.is_leaf():

            #traverse through environment genes for the respective sequence
            for key, value in gene_dict.items():
                motifs = []

                #Assign start and end position for annotated gene
                gene_start = gene_dict[key]['start']
                gene_end = gene_dict[key]['stop']

                #Sort such that the greater number is end and smaller is start
                if gene_start > gene_end:

                    gene_end = gene_dict[key]['start']
                    gene_start = gene_dict[key]['stop']

                #Append motif for annotated gene
                gene_motif=[gene_start, gene_end,'()', \
                2, 10, 'red', 'red', 'arial|10|black|'+str(gene_dict[key]['name'])]

                if not str(gene_dict[key]['frame']).startswith('-'):
                    ori_motif = [
                        gene_end, gene_end + 10, '>', 2, 10, 'red', 'red', None
                    ]

                else:
                    ori_motif=[gene_start-10, gene_start, '<', 2, 10, \
                    'red', 'red', None]

                motifs.extend([gene_motif, ori_motif])

                for key2, value2 in value['env_genes'].items():

                    #Set color, default is orange
                    color = 'orange'

                    if any(keyword in value2['env_name'].lower()
                           for keyword in tnps):
                        color = 'violet'
                    if any(keyword in value2['env_name'].lower()
                           for keyword in ints):
                        color = 'yellow'
                    if any(keyword in value2['env_name'].lower()
                           for keyword in mobiles):
                        color = 'green'
                    if any(keyword in value2['env_name'].lower()
                           for keyword in res):
                        color = 'red'
                    if 'hypothetical' in value2['env_name']:
                        color = 'grey'

                    #Create motif for one env gene at a time and append to motif list
                    motif=[value2['env_start'], value2['env_stop'], '()', 2, 10, color, color, \
                    'arial|10|black|'+str(value2['env_name'])]

                    #Set condition: If env gene != annotated gene, append motif
                    arg_pos = {
                        i
                        for i in range(int(gene_motif[0]), int(gene_motif[1]))
                    }
                    env_pos = {i for i in range(int(motif[0]), int(motif[1]))}

                    #Calculate overlap percentage between annotated gene and env gene
                    total_overlap = float(len(arg_pos.intersection(env_pos)))
                    overlap_perc = float(
                        total_overlap / int(gene_dict[key]['length'])) * 100

                    if overlap_perc <= 70.0:
                        motifs.append(motif)

                        #Create additional motif to show gene orientation
                        if value2['env_strand'] == '+':
                            ori_motif=[value2['env_stop'], value2['env_stop']+10, '>', 2, 10, \
                            color, color, None]

                        else:
                            ori_motif=[value2['env_start']-10, value2['env_start'], '<', 2, 10, \
                            color, color, None]

                        motifs.append(ori_motif)

                #append motif lists to respective annotated gene in dict
                gene_dict[key]['motifs'] = motifs

    #Set node style
    nst_plasmid = NodeStyle()
    nst_plasmid['bgcolor'] = 'DarkSeaGreen'
    nst_other = NodeStyle()
    nst_other = 'AntiqueWhite'

    #Now annotate the tree with the motifs
    for node in t.traverse():
        if node.is_leaf():
            if 'plasmid' in node.organism:
                node.set_style(nst_plasmid)
            else:
                node.set_style(nst_other)

            seqFace=SeqMotifFace(seq=None, motifs=gene_dict[node.name.split('__')[1]]['motifs'], \
            seq_format='blank', gap_format='line')
            (t & node.name).add_face(seqFace, 1, 'aligned')

            #Create box showing gene percent id
            similarity = TextFace(node.pident, fsize=8)
            similarity.margin_top = 2
            similarity.margin_bottom = 2
            similarity.margin_left = 2
            similarity.margin_right = 2

            #Set box background color based on pident
            if node.pident <= 90.0:
                similarity.background.color = 'DarkGoldenrod'
            elif 90.0 < node.pident <= 95.0:
                similarity.background.color = 'ForestGreen'
            elif 95.0 <= node.pident:
                similarity.background.color = 'YellowGreen'

            node.add_face(similarity, column=2, position='aligned')

            #Create box showing cluster size
            if args.compressed == True:
                clust_box = TextFace(node.cluster_size, fsize=8)
                clust_box.margin_top = 2
                clust_box.margin_bottom = 2
                clust_box.margin_left = 2
                clust_box.margin_right = 2

                node.add_face(clust_box, column=3, position='aligned')

    #Return the annotated tree
    return t
Exemple #14
0
def layout(node):
    if node.is_leaf():
        seqFace = SeqMotifFace(seq, motifs, scale_factor=1)
        add_face_to_node(seqFace, node, 0, position="aligned")
Exemple #15
0
def render_alignment(t, seqs):
    for genome, seq in seqs.items():
        seqFace = SeqMotifFace(seq, seq_format='seq')
        (t & genome[1:]).add_face(seqFace, 0,
                                  "aligned")  # get rid of the '>' with [1:]
Exemple #16
0
    def my_layout(node):
        ## Sequence name
        F = TextFace(node.name, tight_text=True)
        add_face_to_node(F, node, column=0, position="aligned")

        ## Sequence motif
        if node.is_leaf():
            motifs_n = []
            box_color = "black"
            opacity = 1
            if node.T == "True" or node.C == "True":
                motifs_n.append([
                    0,
                    len(node.sequence), "[]", 10, 12, box_color, box_color,
                    None
                ])

            motifs_n.append(
                [0, len(node.sequence), "seq", 10, 10, None, None, None])
            seq_face = SeqMotifFace(seq=node.sequence,
                                    seqtype='aa',
                                    seq_format='seq',
                                    fgcolor=box_color,
                                    motifs=motifs_n)
            seq_face.overlaping_motif_opacity = opacity
            add_face_to_node(seq_face, node, column=1, position='aligned')

        ## Nodes style
        if det_tool and node.T == "True":
            node.set_style(nstyle_T_sim)
            add_t(node)
        elif det_tool and node.C == "True":
            node.set_style(nstyle_C_sim)
        elif det_tool:
            node.set_style(nstyle)
        #if not det_tool no background
        elif node.T == "True" and not int(
                node.ND) in g_tree.conv_events.nodesWithTransitions_est:
            node.set_style(nstyle_T_sim)
            add_t(node)
        elif node.T == "True" and int(
                node.ND) in g_tree.conv_events.nodesWithTransitions_est:
            node.set_style(nstyle_T_sim_est)
            add_t(node)
        elif int(node.ND) in g_tree.conv_events.nodesWithTransitions_est:
            node.set_style(nstyle_T_est)
        elif node.C == "True" and not int(
                node.ND) in g_tree.conv_events.nodesWithConvergentModel_est:
            node.set_style(nstyle_C_sim)
        elif node.C == "True" and int(
                node.ND) in g_tree.conv_events.nodesWithConvergentModel_est:
            node.set_style(nstyle_C_sim_est)
        elif int(node.ND) in g_tree.conv_events.nodesWithConvergentModel_est:
            node.set_style(nstyle_C_est)
        elif cz_nodes_s and node.Cz != "False":
            node.set_style(cz_nodes_s[int(node.Cz)])
            if int(node.ND) == int(cz_nodes[int(node.Cz)][0]):
                add_t(node)
        else:
            node.set_style(nstyle)

        if int(node.ND) == sim_root_ND and not det_tool:
            add_sim_root(node)
Exemple #17
0
def get_example_tree(File):
    adres=os.getcwd()
    file_out_supliment = open(adres+"/out_spliment/"+File, 'w')
    node_file = open(adres+"/node/"+File, 'w')
    # Create a random tree and add to each leaf a random set of motifs
    # from the original set
    #t = Tree("( (A, B, C, D, E, F, G), H, I);")
    #Считываем все домены
    domain_all_legend={}
    file_all_domen=os.listdir(adres+"/for_pic/1_tree_nwk/") 
    file_all_domen.remove(".DS_Store")
    file_all_domen.sort()
    i=0
    for file_domain in file_all_domen:
        file_open_domain = open(adres+"/for_pic/3_domain/"+file_domain, 'r')
        for line in file_open_domain:
            line_=line.split("\t")
            try:           
                if not (line_[2] in domain_all_legend):
                    domain_all_legend.setdefault(line_[2],dic_domain_pic_pic[i])
                    i+=1
                if i>len(dic_domain_pic_pic):
                    i=0
            except:
                a=0


    mem=""
    file_open = open(adres+"/for_pic/1_tree_nwk/"+File, 'r')
    for line in file_open:
        mem=mem+line
    tt = Tree(mem, format=0)   


    style = NodeStyle()   
    style["fgcolor"] = "#000000"
    style["size"] = 0
    style["vt_line_color"] = "#000000"
    style["hz_line_color"] = "#000000"
    style["vt_line_width"] = 4
    style["hz_line_width"] = 4
    style["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted
    style["hz_line_type"] = 8
    for node in tt.traverse("levelorder"):
        node.img_style = style
        if (len(node.name))>1:
            node_file.write(node.name+"\n")
        children1=node.children
        for element in children1:
            element.img_style = style
            
                
    for node in tt.traverse("preorder"):
        node.img_style = style
        children1=node.children
        for element in children1:
        	element.img_style = style
    node_file.close

    #вывести дерево с цветами
    #print (tt.get_ascii(attributes=["name", "color"], show_internal=False))
    #поиск предка
    ancestor1=""
    i=0
    for element in ancestor_grop:
        if i==0:
            for node in tt.traverse("postorder"):
                if i==0:
                    node_name=str(node.name)
                    if (node_name.startswith(element)) and  not((node_name.startswith("PPE"))):
                        ancestor1=str(node.name)
                        i=1
                        break
                else:
                    break
        else:
            break
    if not (ancestor1==""):
        tt.set_outgroup(ancestor1)
        #tt.render(adres+"/out/"+File[:-3]+"_2.png", tree_style=circular_style)
        print(str(ancestor1)+" - предок")
        file_out_supliment.write(str(ancestor1)+"\t"+" - предполагаемый корень"+"\n")
    else:
        print("Не нашел предка")



    file_out_supliment.write("\n\n\n Выявленные клады\n")
    #добавляем цвета к кладам
    for leaf in tt:
        i=0
        node_name=str(leaf.name)
        for clad in all_clad:
            collor=collor_list[i]
            i+=1
            for element in clad:
                if (node_name.startswith(element)):
                    leaf.add_features(color=collor)
                    #print(leaf)
    #print(tt)
    #забираем монофилитические цвета
    #print (tt.get_ascii(attributes=["name", "color"], show_internal=False))
    ii=-1
    for clad in all_clad:
        ii+=1
        collor=collor_list[ii]
        for monophyletic_tree in tt.get_monophyletic(values=[collor], target_attr="color"):
            i=[]
            name_node_mono_color=[]
            for leaf in monophyletic_tree:
                i.append(leaf)
                name_node_mono_color.append(leaf.name)
            if len(i)>1:
                n1 = tt.get_common_ancestor(i)
                nst1 = NodeStyle()
                nst1["bgcolor"] = collor
                nst1["fgcolor"] = "#000000"
                nst1["size"] = 0
                nst1["vt_line_color"] = "#000000"
                nst1["hz_line_color"] = "#000000"
                nst1["vt_line_width"] = 4
                nst1["hz_line_width"] = 4
                nst1["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted
                nst1["hz_line_type"] = 8
                n1.set_style(nst1)

                for element in name_node_mono_color:
                    file_out_supliment.write(str(element)+"\t"+" - "+collor+"\n")
                file_out_supliment.write("\n")

   
    file_out_supliment.write("\n\n\n Легенда доменного состава\n")
    #добавляем разметку по доменам
    dic_seq={}
    dic_domain={}
    dic_domain_pic={}
    i=0
    list_legend_domain3=[]
    for node in tt.traverse("postorder"):
        #длины белков
        fasta_sequences=SeqIO.parse(open(adres+"/for_pic/2_MSA/"+File), "fasta")
        for element in fasta_sequences:
            if str(element.id)==str(node.name):
                dic_seq.setdefault(str(node.name),str(element.seq))
        #доменный состав
        a=[]
        file_domain = open(adres+"/for_pic/3_domain/"+File, 'r')
        for line in file_domain:
            line_=line.split("\t")
            if line_[0]==str(node.name):
                if not (line_[2]  in list_legend_domain3):
                    list_legend_domain3.append(line_[2])



                if not (line_[2] in dic_domain_pic):
                    dic_domain_pic.setdefault(line_[2],dic_domain_pic_pic[i])
                    i+=1
                    #print(dic_domain_pic[line_[2]])
                    #print(i)

                    a1=[int(line_[3]),int(line_[4]), "()", None, 15, "black", domain_all_legend[line_[2]], "arial|9|black|"+line_[2]]
                    a.append(a1)
                    dic_domain.setdefault(str(node.name),a)
                    file_out_supliment.write(line_[2]+"\t"+domain_all_legend[line_[2]]+"\n")
                else:
                    a1=[int(line_[3]),int(line_[4]), "()", None, 15, "black", domain_all_legend[line_[2]], "arial|9|black|"+line_[2]]
                    a.append(a1)
                    dic_domain.setdefault(str(node.name),a)

    for element in dic_domain:
        #print(str(element)+" "+ str(dic_domain[element]))
        try:
            seqFace = SeqMotifFace(seq=dic_seq[element], motifs=dic_domain[element], seq_format="line")
            (tt & element).add_face(seqFace, 0, "aligned")
        except:
            seqFace = SeqMotifFace(seq=dic_seq[element],  seq_format="line", gapcolor="red")
            (tt & element).add_face(seqFace, 0, "aligned")
            print("except")

    #Рисуем легенду
    circular_style = TreeStyle()
    circular_style.show_leaf_name = False
    circular_style.show_branch_length = True
    circular_style.show_branch_support = True
    circular_style.scale = 75
    circular_style.tree_width = 50
    file_domain.close
    file_domain = open(adres+"/for_pic/3_domain/"+File, 'r')
    list_legend_domain={}
    list_legend_domain2=[]
    #считали список доменов
    i=0
    for line in file_domain:
        line_=line.split("\t")
        try:
            if  not(line_[2] in list_legend_domain2):
                #print(line_[2])
                list_legend_domain2.append(line_[2])
                list_legend_domain.setdefault("a"+str(i),line_[2])
                i+=1
        except:
            print("не понял что это за домен")
    i=0
    #считываем легенду доменов 
    file_domain_legend2={}
    file_domain_legend = open(adres+"/domain_legend.txt", 'r')
    for line in file_domain_legend:
        line_=line.split("\t")
        aaa=line_[1].replace(" ","_")
        aaa=aaa.replace("(","_")
        aaa=aaa.replace(")","_")
        aaa=aaa.replace(",","_")
        aaa=aaa.replace(":","_")
        aaa=aaa.replace(".","_")
        file_domain_legend2.setdefault(line_[0],aaa.replace("\n",""))
    #N = AttrFace("name", fsize=12)
    #faces.add_face_to_node(N, node, 1, position="branch-right")


    #рисуем домены
    ww=""
    for element in file_domain_legend2:
        ww=ww+","+file_domain_legend2[element]
    ww="("+ww[1:]+");"
    tree_domen_all=Tree(ww)
    for element in file_domain_legend2:
        try:
            element2=domain_all_legend[element]
            a1=[10,90, "()", None, 15, "black", domain_all_legend[element], "arial|9|black|"+element]
            i+=1
            a=[]
            a.append(a1)
            seqFace = SeqMotifFace(seq=seq_seq, motifs=a, seq_format="line")
            #node_node="a"+str(i)
            node_node=file_domain_legend2[element]
            try:
                (tree_domen_all & node_node).add_face(seqFace, 0, "aligned")
            except:
                q=1
                print("не нашел узел")
        except:
                q=1
    circular_style.layout_fn = layout
    tree_domen_all.render(adres+"/out_legend_all.png", tree_style=circular_style)


    file_domain_out = open(adres+"/123123123.txt", 'w')
    w=""
    for element in list_legend_domain3:
        w=w+","+file_domain_legend2[element]
    w="("+w[1:]+");"
    tree_domen=Tree(w)

    for element in list_legend_domain3:
        file_domain_out.write(element+"\n")
        a1=[10,90, "()", None, 15, "black", domain_all_legend[element], "arial|9|black|"+element]
        i+=1
        a=[]
        a.append(a1)
        try:
            seqFace = SeqMotifFace(seq=seq_seq, motifs=a, seq_format="line")
            #node_node="a"+str(i)
            node_node=file_domain_legend2[element]
            (tree_domen & node_node).add_face(seqFace, 0, "aligned")
        except:
            #print("Закончились узлы легенды")
            k=0
    circular_style.layout_fn = layout
    tree_domen.render(adres+"/out_legend/"+File[:-4]+".png", tree_style=circular_style)


    #удаленние части узлов 
    for node in tt.traverse("postorder"):
        try:                
            seqFace = SeqMotifFace(seq=dic_seq[str(node.name)], motifs=dic_domain[str(node.name)], seq_format="line")
            (tt & node.name).add_face(seqFace, 0, "aligned")
            a=0
            if len(node.name)<2:
                a=1
            for element_save in save_node:
                if (node.name).startswith(element_save):
                    a=1
            for element_dell in dell_node:
                if (node.name).startswith(element_dell):
                    a=0
            if a==0:
                node.delete()
        except:
            if len(node.name)>0:
                seqFace = SeqMotifFace(seq=dic_seq[str(node.name)],  seq_format="line", gapcolor="red")
                (tt & node.name).add_face(seqFace, 0, "aligned")
                node.delete()
                d0=0
    #удаленние части узлов ЗАВЕРШЕНО
    #особые точки
    node_color=[]
    file_node_color = open(adres+"/for_pic/4_color_node/out_list_gene2.txt", 'r')
    for line in file_node_color:
        node_color.append(line.replace("\n",""))
    for node in tt.traverse("postorder"):
        if node.name in node_color:
            style = NodeStyle()   
            style["fgcolor"] = "Red"
            style["size"] = 9
            style["vt_line_color"] = "#000000"
            style["hz_line_color"] = "#000000"
            style["vt_line_width"] = 4
            style["hz_line_width"] = 4
            style["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted
            style["hz_line_type"] = 8
            node.set_style(style)
    file_out_supliment.close
    #забираем монофилитические цвета
    #print (tt.get_ascii(attributes=["name", "color"], show_internal=False))
    ii=-1
    for clad in all_clad:
        ii+=1
        collor=collor_list[ii]
        for monophyletic_tree in tt.get_monophyletic(values=[collor], target_attr="color"):
            i=[]
            name_node_mono_color=[]
            for leaf in monophyletic_tree:
                i.append(leaf)
                name_node_mono_color.append(leaf.name)
            if len(i)>1:
                n1 = tt.get_common_ancestor(i)
                nst1 = NodeStyle()
                nst1["bgcolor"] = collor
                nst1["fgcolor"] = "#000000"
                nst1["size"] = 0
                nst1["vt_line_color"] = "#000000"
                nst1["hz_line_color"] = "#000000"
                nst1["vt_line_width"] = 4
                nst1["hz_line_width"] = 4
                nst1["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted
                nst1["hz_line_type"] = 8
                n1.set_style(nst1)

                for element in name_node_mono_color:
                    file_out_supliment.write(str(element)+"\t"+" - "+collor+"\n")
                file_out_supliment.write("\n")

    return tt
Exemple #18
0
def layout(node):
    node.img_style["size"] = 0

    if not node.is_leaf():
        boostFace = faces.TextFace(node.support, fgcolor="grey", fsize=34)
        add_face_to_node(boostFace, node, column=0, position="branch-top")

    if node.is_leaf():

        taxid = (node.name.split('.')[0])
        seq_name = (node.name.split('.')[1])

        #add predicted name (eggnog 4.5)
        try:
            pred_name = predict_name[node.name]
            predNameFace = faces.TextFace(pred_name,
                                          fgcolor="salmon",
                                          fsize=34)
            predNameFace.margin_right = 20
            predNameFace.margin_left = 20
            add_face_to_node(predNameFace,
                             node,
                             column=2,
                             position="branch-right")
        except:
            predNameFace = faces.TextFace('--', fgcolor="salmon", fsize=34)
            add_face_to_node(predNameFace,
                             node,
                             column=2,
                             position="branch-right")

        seqNameFace = faces.TextFace(seq_name, fgcolor="grey", fsize=34)
        add_face_to_node(seqNameFace, node, column=1, position="branch-right")

        sp_name = ncbi.get_taxid_translator([(node.name.split('.')[0])])
        node.name = sp_name[int(taxid)]

        seqFace = SeqMotifFace(node.sequence, gap_format="blank")
        add_face_to_node(seqFace, node, column=3, position="aligned")

        lin = ncbi.get_lineage(taxid)

        #metazoa
        #if int('33208') in lin:
        #    N = AttrFace("name", fsize=34, fgcolor="blue")
        #    N.margin_left = 20
        #    N.margin_right= 20
        #    faces.add_face_to_node(N, node, column=0)
        #cnidaria
        if int('6073') in lin:
            N = AttrFace("name", fsize=34, fgcolor="red")
            N.margin_left = 20
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0)
        #ctenophora
        elif int('10197') in lin:
            N = AttrFace("name", fsize=34, fgcolor="orange")
            N.margin_left = 20
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0)
        #bilateria
        elif int('33213') in lin:
            N = AttrFace("name", fsize=34, fgcolor="blue")
            N.margin_left = 20
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0)
        #porifera
        elif int('6040') in lin:
            N = AttrFace("name", fsize=34, fgcolor="green")
            N.margin_left = 20
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0)
        else:
            N = AttrFace("name", fsize=34, fgcolor="black")
            N.margin_left = 20
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0)
Exemple #19
0
            seq = seq.translate(None, string.ascii_lowercase) # keep only CDS
            iesmotif = [[1, len(seq), "line", 2, 5, None, None, None]]
            for homIES in gfhomIES[geneFamily]:
                (begin, end, ies, iesId, beginMSA, endMSA) = charMat[(geneFamily, homIES, geneId)]
                if ies == '?':
                    if beginMSA == 'NA':
                        iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"])
                    else:
                        iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"])
                elif ies == '1':
                    iesmotif.append([int(beginMSA), int(endMSA),"[]", 10, 10, "black", "red", "arial|8|black|" + iesId])
                elif ies == '0':
                    iesmotif.append([int(begin), int(end), "[]", 10, 10, "silver", "silver", None])
                else:
                    quit(1)
            seqFace = SeqMotifFace(seq = seq, motifs = iesmotif, gap_format = "blank", seq_format = "line")
            leaf.add_face(seqFace, 0, "aligned")
        drawTree(outputFile)


    """
t.show()

# Draw trees.

pp = pprint.PrettyPrinter(indent=8)

# SPECIATION TREE #
###################
wgd1 = Tree('((P_caudatum:1[&&NHX:Ev=S:S=3:ND=3],(((P_sexaurelia:1[&&NHX:Ev=S:S=7:ND=7],P_sonneborni:1[&&NHX:Ev=S:S=8:ND=8]):1[&&NHX:Ev=S:S=5:ND=5],(((P_pentaurelia:1[&&NHX:Ev=S:S=13:ND=13],P_primaurelia:1[&&NHX:Ev=S:S=14:ND=14]):1[&&NHX:Ev=S:S=11:ND=11],(P_biaurelia:1[&&NHX:Ev=S:S=15:ND=15],(P_octaurelia:1[&&NHX:Ev=S:S=17:ND=17],P_tetraurelia:1[&&NHX:Ev=S:S=18:ND=18]):1[&&NHX:Ev=S:S=16:ND=16]):1[&&NHX:Ev=S:S=12:ND=12]):1[&&NHX:Ev=S:S=9:ND=9],P_tredecaurelia:1[&&NHX:Ev=S:S=10:ND=10]):1[&&NHX:Ev=S:S=6:ND=6]):1[&&NHX:Ev=S:S=4:ND=4],((P_sexaurelia:1[&&NHX:Ev=S:S=7:ND=7],P_sonneborni:1[&&NHX:Ev=S:S=8:ND=8]):1[&&NHX:Ev=S:S=5:ND=5],(((P_pentaurelia:1[&&NHX:Ev=S:S=13:ND=13],P_primaurelia:1[&&NHX:Ev=S:S=14:ND=14]):1[&&NHX:Ev=S:S=11:ND=11],(P_biaurelia:1[&&NHX:Ev=S:S=15:ND=15],(P_octaurelia:1[&&NHX:Ev=S:S=17:ND=17],P_tetraurelia:1[&&NHX:Ev=S:S=18:ND=18]):1[&&NHX:Ev=S:S=16:ND=16]):1[&&NHX:Ev=S:S=12:ND=12]):1[&&NHX:Ev=S:S=9:ND=9],P_tredecaurelia:1[&&NHX:Ev=S:S=10:ND=10]):1[&&NHX:Ev=S:S=6:ND=6]):1[&&NHX:Ev=S:S=4:ND=4]):1[&&NHX:Ev=D:S=4:ND=4]):1[&&NHX:Ev=S:S=1:ND=1],T_thermophila:1[&&NHX:Ev=S:S=2:ND=2])[&&NHX:Ev=S:S=0:ND=0];')
Exemple #20
0
def combine_features(data, dsizes, tree, taxid2sp, prot2taxid, taxa_to_merge):
    """ create a ete3 tree with domain features from jackhmmer
    """
    # motif example from http://etetoolkit.org/docs/latest/tutorial/tutorial_drawing.html#phylogenetic-trees-and-sequence-domains
    #simple_motifs = [
    ## seq.start, seq.end, shape, width, height, fgcolor, bgcolor
    #[10, 60, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|long text clipped long text clipped"],
    #[120, 150, "o", None, 10, "blue", "pink", None],
    #[200, 300, "()", None, 10, "blue", "red", "arial|8|white|hello"],
    #]

    # add domain match
    # and ount number of sequences by taxid
    # and get size
    #dsize = dict()
    motifs = dict()
    dcnt_sp = dict()
    for tremolo_dom in data:
        tremolo_dom_start, tremolo_dom_stop = data[tremolo_dom]["QPos"]
        del data[tremolo_dom]["QPos"]
        for prot in data[tremolo_dom]:
            taxid = prot2taxid[prot]
            sp = taxid2sp[taxid]
            sp = sp.replace("(", "").replace(")",
                                             "").replace(",",
                                                         "").replace(";", "")
            taxid2sp[taxid] = sp

            full_domains = data[tremolo_dom][prot].get("Tpos", list())[:]

            target_domains = filter_domain_arch(full_domains)

            dom_arch = list()
            ordered_domains = sorted(target_domains)
            for start, stop, dom in ordered_domains:
                dom_arch.append(dom)
            dom_arch = ";".join(dom_arch)

            if sp not in dcnt_sp:
                dcnt_sp[sp] = dict()
            if dom_arch not in dcnt_sp[sp]:
                dcnt_sp[sp][dom_arch] = list()
            dcnt_sp[sp][dom_arch].append(prot)

            # add domains
            for start, stop, dom in ordered_domains:
                color = get_domain_color(dom)
                motifs.setdefault(sp, dict())\
                        .setdefault(prot, list())\
                        .append([start+1, stop, "[]", None, 10, "black", color, "arial|1|black|{}".format(dom)])
                #motifs[sp][prot].sort()
            for hitnb in data[tremolo_dom][prot]["Hit"]:
                start, stop = data[tremolo_dom][prot]["Hit"][hitnb]["Tali"]
                motifs.setdefault(sp, dict())\
                        .setdefault(prot, list())\
                        .append([start, stop, "o", None, 10, "black", "red", "arial|1|black|HCAdom {}-{}".format(tremolo_dom, hitnb)])

    #print(motifs)

    # merge taxonomic groups
    for taxid_taxa in taxa_to_merge:
        taxid, taxa = taxid_taxa.split(",")
        lnode = tree.search_nodes(name=taxid)
        if len(lnode) > 0:
            taxnode = lnode[0]
            children = [child.name for child in taxnode.children]
            leaves = list()
            for child in taxnode.traverse():
                if child.is_leaf():
                    leaves.append(child)
                    #print(taxid_taxa, child.name)
            taxid2sp[taxid] = taxa
            dcnt_sp[taxa] = dict()
            motifs[taxa] = dict()
            for leafnode in leaves:
                nodeid = leafnode.name
                nodesp = taxid2sp[nodeid]
                #print(taxid_taxa, nodeid, nodesp)
                # merge protein list
                for dom_arch in dcnt_sp[nodesp]:
                    for prot in dcnt_sp[nodesp][dom_arch]:
                        dcnt_sp[taxa].setdefault(dom_arch, list()).append(prot)

                # merge motif
                for prot in motifs[nodesp]:
                    for m in motifs[nodesp][prot]:
                        motifs[taxa].setdefault(prot, list()).append(m)
                # delete obsoletes species
                del dcnt_sp[nodesp]
                del motifs[nodesp]
                del taxid2sp[nodeid]
            #for dom_arch in dcnt_sp[taxa]:
            #for prot in dcnt_sp[taxa][dom_arch]:
            #print(prot, dom_arch)
            for child in children:
                node = tree.search_nodes(name=child)[0]
                node.detach()
            #print(taxnode.get_ascii(show_internal=True))
        else:
            print("Unable to find taxid {} in tree".format(taxid))
            print(lnode)

    for taxid in taxid2sp:
        #print(taxid)
        node = tree.search_nodes(name=taxid)
        if node != []:
            node[0].name = taxid2sp[taxid]
        else:
            print("Unable to find node for taxid {}".format(taxid))

    # expand taxonomic tree by the number of sequences in each taxa
    for node in tree:
        if node.is_leaf():
            if node.name in dcnt_sp:
                node_sp = node.name
                proteins = list()
                features = dict()
                for dom_arch in dcnt_sp[node_sp]:
                    sizes_and_proteins = list()
                    for prot in dcnt_sp[node_sp][dom_arch]:
                        new_name = node_sp + " | " + prot.split("|")[1]
                        if len(dcnt_sp[node_sp][dom_arch]) > 1:
                            new_name += " [+{}]".format(
                                len(dcnt_sp[node_sp][dom_arch]) - 1)
                        sizes_and_proteins.append(
                            (dsizes[prot], new_name, dom_arch, prot))
                    sizes_and_proteins.sort(reverse=True)
                    sizes, names, dom_archs, prots = zip(*sizes_and_proteins)
                    proteins.append(names[0].replace(":", " "))
                    features[names[0].replace(":",
                                              " ")] = (prots[0], dom_archs[0])
                subtree = ete3.PhyloTree("({});".format(", ".join(proteins)))
                node.add_child(subtree)
                for new_node in subtree:
                    prot, dom_arch = features[new_node.name]
                    seq = "G" * dsizes[prot]
                    m = motifs[node_sp][prot]
                    seqFace = SeqMotifFace(seq, seq_format="line", motifs=m)
                    new_node.add_face(seqFace, 0, "aligned")
    #for dom in domain_color:
    #print(dom, domain_color[dom])
    return tree
Exemple #21
0
				#motif = [ LTRRTs[el][feat][0]//100, LTRRTs[el][feat][1]//100, "[]", None, 4, domainColor, domainColor, None ]
			Motifs.append(motif)

	#box_motifs = [
	#	# seq.start, seq.end, shape, width, height, fgcolor, bgcolor
	#	[0,  5, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|10"],
	#	[10, 25, "[]", None, 10, "black", "rgradient:ref", "arial|8|white|10"],
	#	[30, 45, "[]", None, 10, "black", "rgradient:orange", "arial|8|white|20"],
	#	[50, 65, "[]", None, 10, "black", "rgradient:pink", "arial|8|white|20"],
	#	[70, 85, "[]", None, 10, "black", "rgradient:green", "arial|8|white|20"],
	#	[90, 105, "[]", None, 10, "black", "rgradient:brown", "arial|8|white|20"],
	#	[110, 125, "[]", None, 10, "black", "rgradient:yellow", "arial|8|white|20"],
	#]

	#seqFace = SeqMotifFace(seq=None, motifs=box_motifs, gap_format="line")
	seqFace = SeqMotifFace(seq=None, motifs=Motifs, gap_format="line")
	(t & node_name).add_face(seqFace, 0, position='aligned')




if REROOT:
	t.set_outgroup( t & reroot_at )
else:
	# Auto-reroot on taxon with highest divergence corrected
	if greatest_div['element'] == None: # This happens when divergences are not obtained for a given cluster/superfamily (e.g. DIRS)
		pass
	else:
		t.set_outgroup( t & greatest_div['element'].lstrip('LTR_retrotransposon') )

ts = TreeStyle()
Exemple #22
0
    def renderingTreeImage(self):

        path = os.path.join('Input', 'ProteinInput')

        seq_records = SeqIO.parse(path, 'fasta')

        for record in seq_records:
            self.input_protein_accession_number.append(record.id)
            self.input_protein_sequence.append(record.seq)

        with open(os.path.join('execs', 'tmp',
                               "rooted_tree.nwk")) as nwk_tree_handle:
            nwk_tree = nwk_tree_handle.read()
            t = Tree(nwk_tree)
            print(t)
            print '\n'

        ts = TreeStyle()
        ts.title.add_face(TextFace(
            'PhyloEpsilon - Protein Ortholog Finding Tool by Bryan Dighera',
            fsize=16,
        ),
                          column=0)
        ts.allow_face_overlap = True
        ts.show_leaf_name = True
        ts.show_branch_support = True

        leaf_names = []
        for leaf in t.get_leaf_names():

            np_xp_pattern = re.compile('N[P]|X[P]')
            digits_pattern = re.compile('\d+.\d')

            np_xp_search_obj = re.search(np_xp_pattern, leaf)
            digits_search_obj = re.search(digits_pattern, leaf)

            np_xp_name = np_xp_search_obj.group()
            digits_name = digits_search_obj.group()
            final_accession = str(np_xp_name + '_' + digits_name)
            print final_accession
            leaf_names.append(final_accession)

        #print 'leaf names: ' + '%s' % leaf_names

        P = Protein()
        protein_domains, domain_colors, unrepeated_domains = P.Domains()
        print domain_colors

        #Creates a dictionary that corresponds the protein accession number to its corresponding introns
        for i in range(len(leaf_names)):
            self.accession_dict_with_introns[
                self.input_protein_accession_number[i]] = self.exon_lengths[i]

        i = 0

        print 'protein accession number: ' + '%s' % self.input_protein_accession_number
        print 'Accession dict: ' + '%s' % self.accession_dict_with_introns + '\n'

        #Iterates through the accession numbers that correspond the the order of the leaves of the phylogenetic tree to retrieve introns and build fig
        for accession_number in leaf_names:
            intron_motifs = [[0, 0, "[]", None, 12, "White", "White", None]]

            #Checks the accession number against the dictionary and retrieves the corresponding introns, if no introns then doesn't append any
            if accession_number in self.accession_dict_with_introns:
                print accession_number, self.accession_dict_with_introns[
                    accession_number]
                exon_list = self.accession_dict_with_introns[accession_number]
                print exon_list

                for exon_length in exon_list:
                    if str(exon_length) != 'NONE':

                        for location in exon_length:
                            split_exon_location = str(location).split('-')
                            protein_seq_exon_location = int(
                                math.floor(int(split_exon_location[1]) / 3))

                            #Calculates the intron phase and then checks the phase to append appropriate color indicating phase on diagram
                            intron_phase = (int(split_exon_location[1]) -
                                            int(split_exon_location[0])) % 3

                            if intron_phase == 0:
                                intron_motifs.append([
                                    protein_seq_exon_location - 2,
                                    protein_seq_exon_location + 2, "[]", None,
                                    5, "Grey", "Grey", None
                                ])
                            elif intron_phase == 1:
                                intron_motifs.append([
                                    protein_seq_exon_location - 2,
                                    protein_seq_exon_location + 2, "[]", None,
                                    5, "Black", "Black", None
                                ])

                            elif intron_phase == 2:
                                intron_motifs.append([
                                    protein_seq_exon_location - 2,
                                    protein_seq_exon_location + 2, "[]", None,
                                    5, "Blue", "Blue", None
                                ])
                    else:
                        print 'NO INTRONS FOUND FOR RECORD'

                print str(intron_motifs) + '\n'
                msa_protein_seq = self.msa_aligned_protein[i].strip('-')

                #ete3 module that adds the introns(motifs) to the phylogenetic tree
                seqFace = SeqMotifFace(str(msa_protein_seq),
                                       gapcolor="black",
                                       seq_format='line',
                                       scale_factor=1,
                                       motifs=intron_motifs)
                (t & t.get_leaf_names()[i]).add_face(seqFace, 0, "aligned")

                i += 1

        n = 0

        # Iterates through the accession numbers that correspond to the order of the leaves of the phylogenetic tree and compare to domain dict values
        # TODO: Add the legend and possibly give a number to each of the domains so they can be easily identified in the legend
        for accession_number in leaf_names:

            domain_motifs = [[0, 0, "[]", None, 12, "White", "White", None]]

            for domain in protein_domains:

                if accession_number in domain:

                    print 'leaf accession #: ' + '%s' % accession_number
                    print 'domains accession: ' + '%s' % domain.keys()[0]
                    print domain.values()[0]

                    for each_domain in domain.values()[0]:

                        try:

                            domain_motif_color = domain_colors[each_domain[0]]
                            start_domain_loc = int(
                                each_domain[1].split(':')[0])

                            end_domain_loc = int(each_domain[1].split(':')[1])
                            domain_name = str(each_domain[0])

                            domain_motifs.append([
                                start_domain_loc, end_domain_loc, "<>", 20, 20,
                                'Black', domain_motif_color, 'arial|8|black|'
                            ])
                        except ValueError:

                            domain_motif_color = domain_colors[each_domain[0]]

                            start_pattern = re.compile('(?<!=\W)\d+')
                            start_pattern_search = re.search(
                                start_pattern,
                                str(each_domain[1].split(':')[0]))
                            start_domain_loc = int(
                                start_pattern_search.group())

                            end_pattern = re.compile('(?<!=\W)\d+')
                            end_pattern_search = re.search(
                                end_pattern, str(each_domain[1].split(':')[1]))
                            end_domain_loc = int(end_pattern_search.group())

                            domain_motifs.append([
                                start_domain_loc, end_domain_loc, "<>", 20, 20,
                                'Black', domain_motif_color, 'arial|8|black|'
                            ])

            print domain_motifs

            msa_protein_seq = self.msa_aligned_protein[n].strip('-')
            print msa_protein_seq
            print len(msa_protein_seq)
            print '*' * 100

            domainFace = SeqMotifFace(str(msa_protein_seq),
                                      gapcolor="black",
                                      seq_format='line',
                                      scale_factor=1,
                                      motifs=domain_motifs)
            (t & t.get_leaf_names()[n]).add_face(domainFace, 0, "aligned")

            n += 1

        #Creating the legend

        print protein_domains
        for single_unrepeat, colors in domain_colors.iteritems():

            ts.legend.add_face(TextFace(single_unrepeat), column=0)
            ts.legend.add_face(SeqMotifFace(
                "A" * 45, [[0, 80, "[]", None, 8, "Black", colors, None]]),
                               column=1)
            ts.legend_position = 1

        #name_of_run = nameOfRun()
        file_name = self.run_name
        t.show(tree_style=ts)
        t.render(os.path.join('CompletedTrees', file_name + '.pdf'),
                 tree_style=ts)
Exemple #23
0
def custom_layout(node):
    global ncbi, NCBIPATH
    if not ncbi:
        ncbi = NCBITaxa(NCBIPATH)

    global orig_name, TABLEPATH
    if not orig_name:
        orig_name = {}
        with open(TABLEPATH) as tablefn:
            for line in tablefn:
                if line.strip() and not line.startswith("#"):
                    line_data = line.strip().split("\t")
                    name_in_tree = line_data[0]
                    name_to_show = line_data[2]
                    orig_name[name_in_tree] = name_to_show

    if node.is_leaf():

        total_name = (node.name)

        node_name = node.name.split('|')[0]
        name2taxid = ncbi.get_name_translator([node_name])
        taxid = name2taxid[node_name]
        lin = ncbi.get_lineage(int(taxid[0]))

        prot_info = (total_name.split('|')[2])  #.split('.',1)[1]
        prot_id = prot_info.split('.', 1)[1]
        if len(prot_id) > 50:
            prot_id = prot_id[0:50]

        if prot_info in orig_name.keys():
            gene_name = orig_name[prot_info]

        else:
            gene_name = (total_name.split('|')[1])

        aligned_pname_face = TextFace(prot_id, fgcolor='grey', fsize=11)
        aligned_pname_face.margin_top = 0
        aligned_pname_face.margin_bottom = 0
        aligned_pname_face.margin_right = 20
        add_face_to_node(aligned_pname_face,
                         node,
                         column=1,
                         position='branch-right')

        aligned_gname_face = TextFace(gene_name, fgcolor='black', fsize=11)
        aligned_gname_face.margin_top = 0
        aligned_gname_face.margin_bottom = 0
        aligned_gname_face.margin_left = 5
        add_face_to_node(aligned_gname_face,
                         node,
                         column=2,
                         position='branch-right')

        seqFace = SeqMotifFace(node.sequence, gap_format="blank")
        seqFace.margin_left = 5
        add_face_to_node(seqFace, node, column=4, position="aligned")

        if node_name.startswith("H**o"):
            # Add an static face that handles the node name
            N = TextFace(node_name, fsize=11, fgcolor="red")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Spongilla"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Sycon"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Amphimedon"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Oscarella"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Gallus"):
            N = TextFace(node_name, fsize=11, fgcolor="red")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Branchiostoma"):
            N = TextFace(node_name, fsize=11, fgcolor="red")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Trichoplax"):
            N = TextFace(node_name, fsize=11, fgcolor="orange")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Nematostella"):
            N = TextFace(node_name, fsize=11, fgcolor="orange")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Hydra"):
            N = TextFace(node_name, fsize=11, fgcolor="orange")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Drosophila"):
            N = TextFace(node_name, fsize=11, fgcolor="blue")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node_name.startswith("Crassostrea"):
            N = TextFace(node_name, fsize=11, fgcolor="blue")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0, position='branch-right')

        elif node.is_leaf():
            N = TextFace(node_name, fsize=11, fgcolor="black")
            N.margin_right = 20
            faces.add_face_to_node(N, node, column=0)

        if int('7742') in lin:
            N = TextFace('vertebrata', fsize=11, fgcolor="red")
            #N.margin_left = 20
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position='aligned')

        if int('6040') in lin:
            N = TextFace('porifera', fsize=11, fgcolor="green")
            #N.margin_left = 20
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position='aligned')

        if int('6073') in lin:
            N = TextFace('cnidario', fsize=11, fgcolor="orange")
            #N.margin_left = 20
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position='aligned')

    else:
        node.img_style['size'] = 3
        node.img_style['shape'] = 'square'

        if node.name:
            name_face = TextFace(node.name, fgcolor='grey', fsize=10)
            name_face.margin_bottom = 1
            add_face_to_node(name_face, node, column=0, position='branch-top')
        if node.support:
            support_face = TextFace(node.support, fgcolor='indianred', fsize=8)
            support_face.margin_bottom = 1
            add_face_to_node(support_face,
                             node,
                             column=0,
                             position='branch-bottom')
def custom_layout(node):
    
    ncbi=connect_ncbitaxa()
    
    if node.is_leaf():
        
        total_name = (node.name)
        if not total_name or total_name == "":
            sys.stderr.write("Name of node is null or empty when creating custom layout.\n")
            return

        #seq_name = (total_name.split('.', 1)[-1])
        seq_name = (total_name.split('|')[1])
        other_info = (total_name.split('|')[2]) 
        
        aligned_name_face = TextFace(seq_name, fgcolor='brown', fsize=11)
        aligned_name_face.margin_top = 0
        aligned_name_face.margin_bottom = 0
        aligned_name_face.margin_left = 5
        add_face_to_node(aligned_name_face, node, column=2, position='aligned')
        
        #node.name=(node.name.split('|')[0])
        node_name = node.name.split('|')[0]
        if not node_name or node_name.strip() == "":
            sys.stderr.write("Node name is null or empty when creating custom layout.\n")
            return
        
        name2taxid=ncbi.get_name_translator([node_name])
        taxid=name2taxid[node_name]
        lin = ncbi.get_lineage(int(taxid[0]))
               
        if int('7742') in lin:
            N = TextFace('vertebrata', fsize=11, fgcolor="red")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned')
            
        if int('6040') in lin:
            N = TextFace('porifera', fsize=11, fgcolor="green")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned')  
          
        if int('6073') in lin:
            N = TextFace('cnidario', fsize=11, fgcolor="orange")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned')    
        
        if int('33317') in lin:
            N = TextFace('protostomia', fsize=11, fgcolor="blue")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned')    
        
        if int('10197') in lin:
            N = TextFace('Ctenophora', fsize=11, fgcolor="indigo")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned')   
        
        if int('10226') in lin:
            N = TextFace('Ctenophora', fsize=11, fgcolor="sienna")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned') 
            
        if int('6157') in lin:
            N = TextFace('Platyhelminthes', fsize=11, fgcolor="olive")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned') 
            
        if int('7735') in lin:
            N = TextFace('Cephalochordata', fsize=11, fgcolor="skyblue")
            N.margin_left = 5
            N.background.color = "Linen"
            add_face_to_node(N, node, column=3, position = 'aligned') 
        
             
        tax, seqs_info = other_info.split('.', 1)
        try:
            tax = int(tax)
        except:
            tax = tax
            
        
        if tax in lin:
            aligned_name_face = TextFace(seqs_info, fgcolor='grey', fsize=11)
            aligned_name_face.margin_top = 0
            aligned_name_face.margin_bottom = 0
            aligned_name_face.margin_left = 5
            add_face_to_node(aligned_name_face, node, column=4, position='aligned')
            
        else:
            aligned_name_face = TextFace(other_info, fgcolor='red', fsize=11)
            aligned_name_face.margin_top = 0
            aligned_name_face.margin_bottom = 0
            aligned_name_face.margin_left = 5
            add_face_to_node(aligned_name_face, node, column=4, position='aligned')
            
        
        
        seqFace = SeqMotifFace(node.sequence, gap_format="blank")
        add_face_to_node(seqFace, node, column=5, position="aligned")
        
        node.img_style['size'] = 0
        #try:
        #    g_sym=gene_sym[sci_name]
        #    predNameFace = faces.TextFace(g_sym,fgcolor = "navy" , fsize=28)
        #    add_face_to_node(predNameFace, node, column=3, position="branch-right" )
        #except:
        #    predNameFace = faces.TextFace(' ',fgcolor="navy", fsize=28)
        #    add_face_to_node(predNameFace, node, column=3, position="branch-right")
            
            
        if node_name.startswith("H**o"):
            # Add an static face that handles the node name
            N = TextFace(node_name, fsize=11, fgcolor="red")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif node_name.startswith("Spongilla"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            add_face_to_node(N, node, column=0, position = 'branch-right')
            
        elif node_name.startswith("Sycon"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif  node_name.startswith("Amphimedon"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif node_name.startswith("Oscarella"):
            N = TextFace(node_name, fsize=11, fgcolor="green")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif  node_name.startswith("Gallus"):
            N = TextFace(node_name, fsize=11, fgcolor="red")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif  node_name.startswith("Branchiostoma"):
            N = TextFace(node_name, fsize=11, fgcolor="red")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif  node_name.startswith("Trichoplax"):
            N = TextFace(node_name, fsize=11, fgcolor="orange")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif node_name.startswith("Nematostella"):
            N = TextFace(node_name, fsize=11, fgcolor="orange")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif node_name.startswith("Hydra"):
            N = TextFace(node_name, fsize=11, fgcolor="orange")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif  node_name.startswith("Drosophila"):
            N = TextFace(node_name, fsize=11, fgcolor="blue")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        elif  node_name.startswith("Crassostrea"):
            N = TextFace(node_name, fsize=11, fgcolor="blue")
            add_face_to_node(N, node, column=0, position = 'branch-right')

        else:
            name_face = TextFace(node_name, fgcolor='#333333', fsize=11)
            name_face.margin_top = 0
            name_face.margin_bottom = 0
            add_face_to_node(name_face, node, column=0, position='branch-right')
            
        
    else:
        node.img_style['size'] = 3
        node.img_style['shape'] = 'square'
        
        if node.name:
            name_face = TextFace(node.name, fgcolor='grey', fsize=10)
            name_face.margin_bottom = 1
            add_face_to_node(name_face, node, column=0, position='branch-top')
        if node.support:
            support_face = TextFace(node.support, fgcolor='indianred', fsize=8)
            support_face.margin_bottom = 1
            add_face_to_node(support_face, node, column=0, position='branch-bottom')
            
    return