Ejemplo n.º 1
0
def build_hmm_from_tree(base,tree_name,aln_name,msa_dir,hmm_dir):
    '''
    Reads tree and corresponding msa and create an MSA & HMM for each internal node.
    '''
    
    # Annotate internal nodes with name of corresponding HMM.
    pt = PhyloTree(tree_name,alignment=aln_name,alg_format="fasta")
    i_node = 0
    for node in pt.traverse():
        if not node.is_leaf():
            node_name = 'node%s' % (str(i_node))
            node.add_features(hmm=node_name)
            i_node += 1
            
            # make msa for node
            msa_string = []
            for leaf in node.iter_leaves():
                msa_string.append(">%s" % leaf.name)
                msa_string.append(str(leaf.sequence))
            msa_string = '\n'.join(msa_string)
            msa = open('%s%s.aln' % (msa_dir, node_name),'w'); msa.write(msa_string); msa.close()
            
            # build HMM for node
            check_call(['python', 'build_hmmer3_hmm_from_alignment.py', '--name',
                     '%s%s' % (hmm_dir, node_name),
                     '%s%s.aln' % (msa_dir, node_name)])
                     
    #concatenate HMMs into one file for Hmmscan
    os.system('cat %s*.hmm > %s%s_concat.hmm' %
              (hmm_dir, hmm_dir, base))
    return pt
Ejemplo n.º 2
0
def run(args):
    from ete2 import Tree, PhyloTree
    for nw in args.src_tree_iterator:
        if args.orthologs is not None:
            t = PhyloTree(nw)
            for e in t.get_descendant_evol_events():
                print e.in_seqs, e.out_seqs
Ejemplo n.º 3
0
def run(args):
    from ete2 import Tree, PhyloTree
    for nw in args.src_tree_iterator:
        if args.orthologs is not None:
            t = PhyloTree(nw)
            for e in t.get_descendant_evol_events():
                print e.in_seqs, e.out_seqs
Ejemplo n.º 4
0
Archivo: tree.py Proyecto: tchen65/NAPA
def load_tree_sequences(nwk_file, fasta_file):
    ''' 
    Load a tree with associated sequences on leaves. 
    '''
    tree = PhyloTree(newick=nwk_file, format=1)
    tree.link_to_alignment(alignment=fasta_file, alg_format='fasta')
    return tree
Ejemplo n.º 5
0
def get_example_tree():

    # Performs a tree reconciliation analysis 
    gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
    species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);"
    genetree = PhyloTree(gene_tree_nw)
    sptree = PhyloTree(species_tree_nw)
    recon_tree, events = genetree.reconcile(sptree)
    recon_tree.link_to_alignment(alg)
    return recon_tree, TreeStyle()
Ejemplo n.º 6
0
def get_example_tree():

    # Performs a tree reconciliation analysis
    gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
    species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);"
    genetree = PhyloTree(gene_tree_nw)
    sptree = PhyloTree(species_tree_nw)
    recon_tree, events = genetree.reconcile(sptree)
    recon_tree.link_to_alignment(alg)
    return recon_tree, TreeStyle()
Ejemplo n.º 7
0
def my_tree_loader(tree):
    """ This function is used to load trees within the WebTreeApplication object. """

    t = PhyloTree(tree, sp_naming_function=None)

    #Check one leaf to see if species information is included
    if t.get_leaves()[0].species == "Unknown":
        t.set_species_naming_function(extract_species_code)

    return t
def load_tree(f_tree, f_align):
    # Tree style
    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = True
    ts.show_branch_support = True
    ts.branch_vertical_margin = 10    

    # Load phylogenetic tree
    tree = PhyloTree(f_tree.read())
    if f_align is not None:    
        tree.link_to_alignment(f_align.read())
    return tree, ts
def integrate_pwids_into_tree(tree, alignment):
    '''Takes a tree and an alignment and returns a new tree with values of pwid added to each
    node in the tree as node.pwid.'''
    pt = PhyloTree(tree,alignment=alignment,alg_format="fasta")
    for ind, node in enumerate(pt.traverse()):
        node.node_kerf_name = 'node%s' % str(ind).zfill(3)
        # For later kerf and sh functions
        node.kerf_pass = False
        node.sh_pass = False
        if not node.is_leaf():
            node.min_pwid = get_min_pwid_of_leaves(node.get_leaves())
        else:
            node.min_pwid = 1.0
    return pt
Ejemplo n.º 10
0
def main():
  fn=sys.argv[1]
  nw=open(fn).readline()
  
  species={}
  t=PhyloTree(nw)
  
  #set species naming function
  t.set_species_naming_function(_get_spcode)
  
  for l in t.get_leaves():
    spCode=l.species
    try:    species[spCode]+=1
    except: species[spCode] =1
    
  for spCode in sorted( species, key=lambda x: species[x], reverse=True ):
    print '%s\t%s' % ( spCode,species[spCode] )
Ejemplo n.º 11
0
  def test_01tree_annotation(self):
    t = PhyloTree( "((9598, 9606), 10090);", sp_naming_function=lambda name: name)
    t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
    self.assertEqual(t.sci_name, 'Euarchontoglires')

    homi = (t&'9606').up
    self.assertEqual(homi.sci_name, 'Homininae')
    self.assertEqual(homi.taxid, 207598)
    self.assertEqual(homi.rank, 'subfamily')
    self.assertEqual(homi.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae'])
    self.assertEqual(homi.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598] )

    human = t&'9606'
    self.assertEqual(human.sci_name, 'H**o sapiens')
    self.assertEqual(human.taxid, 9606)
    self.assertEqual(human.rank, 'species')
    self.assertEqual(human.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae', u'H**o', u'H**o sapiens'])
    self.assertEqual(human.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, 9605, 9606])
Ejemplo n.º 12
0
def main():
    fn = sys.argv[1]
    nw = open(fn).readline()

    species = {}
    t = PhyloTree(nw)

    #set species naming function
    t.set_species_naming_function(_get_spcode)

    for l in t.get_leaves():
        spCode = l.species
        try:
            species[spCode] += 1
        except:
            species[spCode] = 1

    for spCode in sorted(species, key=lambda x: species[x], reverse=True):
        print '%s\t%s' % (spCode, species[spCode])
Ejemplo n.º 13
0
def phylomedump_tree_iterator( tarfn,verbose=0 ):
    """PhylomeDB all_trees.tar.gz dump treeobj generator."""
    #open tarfile
    if tarfn.endswith(".gz"):
        tar = tarfile.open(tarfn, "r:gz")
    else:
        tar = tarfile.open(tarfn, "r")

    i = k = 0
    #process entries
    for m in tar:
        #if i>100: break
        if not m.isfile():
            continue
        #load tree
        if   m.name.endswith(".nw"):
            i += 1
            #get nw
            nw = tar.extractfile(m).readline()
            t  = PhyloTree(nw)
            ##add seedid and method info
            #Phy000CWA9_YEAST.JTT.nw --> Phy000CWA9_YEAST JTT
            seedid, method = os.path.basename(m.name).split(".")[:2]
            t.seedid = seedid
            t.method = method
        #or add lk, seedid, method and lk to treeobj
        elif m.name.endswith(".lk"):
            seedid, method, lk = tar.extractfile(m).readline().split('\t')[:3]
            t.lk = float(lk)
            if not t.lk:
                sys.stderr.write( " Err: Zero likelihood (%s) for: %s\n" % (t.lk, ", ".join((t.seedid, t.method))))
                continue
            if seedid!=t.seedid or t.method != method:
                sys.stderr.write( " Err: Seedid and/or method doesn't match: %s\n" % ", ".join((seedid, t.seedid, method, t.method)))
                continue
            k += 1
            if verbose and not i%100:
                sys.stderr.write( "  %6i\r" % i )

            yield t
    if verbose:
        sys.stderr.write( " %s out of %s trees succesfully parsed [memory: %s KB]\n" % (k, i, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
Ejemplo n.º 14
0
    def get_tree(self, protid, method, phylome_id):
        """ Returns the method-tree associated to a given protid. """

        cmd = 'SELECT newick,lk FROM %s WHERE phylome_id=%s AND species="%s" AND protid="%s" AND method ="%s"' %\
            (self._trees_table, phylome_id, protid[:3],protid[3:],method)
        if self._SQL.execute(cmd):
            entry = self._SQL.fetchone()
            nw = entry[0]
            lk = float(entry[1])
            t = PhyloTree(nw)
        else:
            t = None
            lk = None
        return t, lk
Ejemplo n.º 15
0
def build_hmm_from_tree(tree_name, aln_name, msa_dir, hmm_dir):
    '''
    Reads tree and corresponding msa and create an MSA & HMM for each internal node.
    '''

    # Annotate internal nodes with name of corresponding HMM.
    pt = PhyloTree(tree_name, alignment=aln_name, alg_format="fasta")
    i_node = 0
    for node in pt.traverse():
        if not node.is_leaf():
            node_name = 'node%s' % (str(i_node))
            #print node_name
            #print node
            node.add_features(hmm=node_name)

            i_node += 1

            # make msa for node
            msa_string = []
            for leaf in node.iter_leaves():
                msa_string.append(">%s" % leaf.name)
                msa_string.append(str(leaf.sequence))
            msa_string = '\n'.join(msa_string)
            msa = open('%s%s.aln' % (msa_dir, node_name), 'w')
            msa.write(msa_string)
            msa.close()

            # build HMM for node
            check_call([
                'build_hmmer3_hmm_from_alignment.py', '--name',
                '%s%s' % (hmm_dir, node_name),
                '%s%s.aln' % (msa_dir, node_name)
            ])

    #concatenate HMMs into one file for Hmmscan
    os.system('cat %s*.hmm > %sconcat.hmm' % (hmm_dir, hmm_dir))
    return pt
Ejemplo n.º 16
0
def get_topology(taxids, intermediate_nodes=False, rank_limit=None):
    from ete2 import PhyloTree
    sp2track = {}
    elem2node = {}
    for sp in taxids:
        track = deque()
        lineage = get_sp_lineage(sp)
        id2rank = get_ranks(lineage)

        for elem in lineage:
            node = elem2node.setdefault(elem, PhyloTree())
            node.name = str(elem)
            node.add_feature("rank", str(id2rank.get(int(elem), "?")))
            track.append(node)
        sp2track[sp] = track

    # generate parent child relationships
    for sp, track in sp2track.iteritems():
        parent = None
        for elem in track:
            if parent and elem not in parent.children:
                parent.add_child(elem)
            if rank_limit and elem.rank == rank_limit:
                break
            parent = elem
    root = elem2node[1]

    # This fixes cases in which requested taxids are internal nodes
    #for x in set(sp2track) - set([n.name for n in root.iter_leaves()]):
    #    new_leaf = sp2track[x][-1].copy()
    #    for ch in new_leaf.get_children():
    #        ch.detach()
    #    sp2track[x][-1].add_child(new_leaf)

    #remove onechild-nodes
    if not intermediate_nodes:
        for n in root.get_descendants():
            if len(n.children) == 1 and int(n.name) not in taxids:
                n.delete(prevent_nondicotomic=False)

    if len(root.children) == 1:
        return root.children[0].detach()
    else:
        return root
Ejemplo n.º 17
0
    def get_best_tree(self, protid, phylome_id):
        """ Returns the winner ML tree"""

        likelihoods = {}
        winner_model = None
        winner_lk = None
        winner_newick = None
        t = None
        command ='SELECT newick,method,lk FROM %s WHERE phylome_id=%s AND species="%s" and protid="%s";' \
            % (self._trees_table,phylome_id, protid[:3], protid[3:])
        self._SQL.execute(command)
        result = self._SQL.fetchall()
        for r in result:
            nw, m, lk = r
            if lk < 0:
                likelihoods[m] = lk
                if winner_lk == None or lk > winner_lk:
                    winner_lk = lk
                    winner_model = m
                    winner_newick = nw
        if winner_newick:
            t = PhyloTree(winner_newick)
        return winner_model, likelihoods, t
Ejemplo n.º 18
0
    def test_01tree_annotation(self):
        t = PhyloTree("((9598, 9606), 10090);", sp_naming_function=lambda name: name)
        t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
        self.assertEqual(t.sci_name, "Euarchontoglires")

        homi = (t & "9606").up
        self.assertEqual(homi.sci_name, "Homininae")
        self.assertEqual(homi.taxid, 207598)
        self.assertEqual(homi.rank, "subfamily")
        self.assertEqual(
            homi.named_lineage,
            [
                u"root",
                u"cellular organisms",
                u"Eukaryota",
                u"Opisthokonta",
                u"Metazoa",
                u"Eumetazoa",
                u"Bilateria",
                u"Deuterostomia",
                u"Chordata",
                u"Craniata",
                u"Vertebrata",
                u"Gnathostomata",
                u"Teleostomi",
                u"Euteleostomi",
                u"Sarcopterygii",
                u"Dipnotetrapodomorpha",
                u"Tetrapoda",
                u"Amniota",
                u"Mammalia",
                u"Theria",
                u"Eutheria",
                u"Boreoeutheria",
                u"Euarchontoglires",
                u"Primates",
                u"Haplorrhini",
                u"Simiiformes",
                u"Catarrhini",
                u"Hominoidea",
                u"Hominidae",
                u"Homininae",
            ],
        )
        self.assertEqual(
            homi.lineage,
            [
                1,
                131567,
                2759,
                33154,
                33208,
                6072,
                33213,
                33511,
                7711,
                89593,
                7742,
                7776,
                117570,
                117571,
                8287,
                1338369,
                32523,
                32524,
                40674,
                32525,
                9347,
                1437010,
                314146,
                9443,
                376913,
                314293,
                9526,
                314295,
                9604,
                207598,
            ],
        )

        human = t & "9606"
        self.assertEqual(human.sci_name, "H**o sapiens")
        self.assertEqual(human.taxid, 9606)
        self.assertEqual(human.rank, "species")
        self.assertEqual(
            human.named_lineage,
            [
                u"root",
                u"cellular organisms",
                u"Eukaryota",
                u"Opisthokonta",
                u"Metazoa",
                u"Eumetazoa",
                u"Bilateria",
                u"Deuterostomia",
                u"Chordata",
                u"Craniata",
                u"Vertebrata",
                u"Gnathostomata",
                u"Teleostomi",
                u"Euteleostomi",
                u"Sarcopterygii",
                u"Dipnotetrapodomorpha",
                u"Tetrapoda",
                u"Amniota",
                u"Mammalia",
                u"Theria",
                u"Eutheria",
                u"Boreoeutheria",
                u"Euarchontoglires",
                u"Primates",
                u"Haplorrhini",
                u"Simiiformes",
                u"Catarrhini",
                u"Hominoidea",
                u"Hominidae",
                u"Homininae",
                u"H**o",
                u"H**o sapiens",
            ],
        )
        self.assertEqual(
            human.lineage,
            [
                1,
                131567,
                2759,
                33154,
                33208,
                6072,
                33213,
                33511,
                7711,
                89593,
                7742,
                7776,
                117570,
                117571,
                8287,
                1338369,
                32523,
                32524,
                40674,
                32525,
                9347,
                1437010,
                314146,
                9443,
                376913,
                314293,
                9526,
                314295,
                9604,
                207598,
                9605,
                9606,
            ],
        )
Ejemplo n.º 19
0
    parser.add_argument("--newick", dest="newick", 
                        action="store_true", default="",
                        help=("print the extended newick format for provided tree using"
                              " ASCII representation and all its evolutionary events"
                              " before orthoXML export"))
    
    
    args = parser.parse_args()
    newick = args.tree[0]

    SPECIES_NAME_POS = args.species_field
    SPECIES_NAME_DELIMITER = args.species_delimiter

    # load a phylomeDB Tree provided as a newick file in the command line
    t = PhyloTree(newick, sp_naming_function=extract_spname)

    if args.root:
        if len(args.root) > 1:
            outgroup = t.get_common_ancestor(args.root)
        else:
            outgroup = t & args.root[0]
        t.set_outgroup(outgroup)


    if not args.skip_ortholog_detection:
        # detect speciation and duplication events using the species overlap
        # algorithm used in phylomeDB
        t.get_descendant_evol_events()
        
    if args.ascii:
Ejemplo n.º 20
0
            pie = PieChartFace([changes[node.name][0], changes[node.name][1]],
                               changes[node.name][2], changes[node.name][2],
                               ["Green", "Red"])
            pie.opacity = 0.5
            #faces.add_face_to_node(name_face, node, column=0, position="branch-right")
            faces.add_face_to_node(pie, node, column=0, position="float")


ts = TreeStyle()
# Do not add leaf names automatically ts.show_leaf_name = False
# Use my custom layout
ts.show_leaf_name = False
ts.layout_fn = my_layout

t = PhyloTree(
    '/Volumes/MP_HD/Pm_Ts_Tf_Pf_comparison/4_spec_gene_gain_loss/eurot_gene_gain_loss/node_assignments_tree_nos_only_dbl.nwk',
    format=1)

dataorder = [
    'A. fumigatus', 'N. fisheri', 'A. clavatus', 'A. terreus', 'A. flavus',
    'A. oryzae', 'A. niger', 'A. nidulans', 'P. decumbens', 'P. roquefortii',
    'P. chrysogenum', 'P. digitatum', 'T. stipitatus', 'P. funiculosum',
    'T. marneffei', 'T. flavus', 'A. dermatiditis', 'H. capsulatum',
    'P. brasiliensis', 'C. immitis', 'U. reesei', 'T. equinum', 'T. tonsurans'
]
nos = [
    "1", '2', '4', '6', '7', '8', '12', '14', '16', '17', '18', '19', '24',
    '25', '26', '27', '32', '33', '35', '37', '38', '40', '41'
]

branch_to_node = {
Ejemplo n.º 21
0
    def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collapse_subspecies=False):
        """Given a list of taxid numbers, return the minimal pruned NCBI taxonomy tree
        containing all of them.

        :param False intermediate_nodes: If True, single child nodes
        representing the complete lineage of leaf nodes are kept. Otherwise, the
        tree is pruned to contain the first common ancestor of each group.

        :param None rank_limit: If valid NCBI rank name is provided, the tree is
        pruned at that given level. For instance, use rank="species" to get rid
        of sub-species or strain leaf nodes.

        """
        from ete2 import PhyloTree
        sp2track = {}
        elem2node = {}
        for sp in taxids:
            track = deque()
            lineage = self.get_sp_lineage(sp)
            id2rank = self.get_ranks(lineage)

            for elem in lineage:
                node = elem2node.setdefault(elem, PhyloTree())
                node.name = str(elem)
                node.add_feature("rank", str(id2rank.get(int(elem), "?")))
                track.append(node)
            sp2track[sp] = track

        # generate parent child relationships
        for sp, track in sp2track.iteritems():
            parent = None
            for elem in track:
                if parent and elem not in parent.children:
                    parent.add_child(elem)
                if rank_limit and elem.rank == rank_limit:
                    break
                parent = elem
        root = elem2node[1]

        # This fixes cases in which requested taxids are internal nodes
        #for x in set(sp2track) - set([n.name for n in root.iter_leaves()]):
        #    new_leaf = sp2track[x][-1].copy()
        #    for ch in new_leaf.get_children():
        #        ch.detach()
        #    sp2track[x][-1].add_child(new_leaf)

        #remove onechild-nodes
        if not intermediate_nodes:
            for n in root.get_descendants():
                if len(n.children) == 1 and int(n.name) not in taxids: 
                    n.delete(prevent_nondicotomic=False)

        if collapse_subspecies:
            species_nodes = [n for n in t.traverse() if n.rank == "species"
                             if int(n.taxid) in all_taxids]
            for sp_node in species_nodes:
                bellow = sp_node.get_descendants()
                if bellow:
                    # creates a copy of the species node
                    connector = sp_node.__class__()
                    for f in sp_node.features:
                        connector.add_feature(f, getattr(sp_node, f))
                    connector.name = connector.name + "{species}"
                    for n in bellow:
                        n.detach()
                        n.name = n.name + "{%s}" %n.rank
                        sp_node.add_child(n)
                    sp_node.add_child(connector)
                    sp_node.add_feature("collapse_subspecies", "1")
                    
        if len(root.children) == 1:
            return root.children[0].detach()
        else:
            return root
Ejemplo n.º 22
0
def run(args):
    from ete2 import Tree, PhyloTree
        
    features = set()    
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update(["taxid", "name", "rank", "bgcolor", "sci_name",
                             "collapse_subspecies", "named_lineage", "lineage"])            
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)
        
        type2cast = {"str":str, "int":int, "float":float, "set":set, "list":list}

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" %field )
                
                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" %field)
                else:
                    raise ValueError("Unknown feature option [%s]" %field)
                
            if not aname and not asource:
                ValueError('name and source are required when annotating a new feature [%s]'
                           % annotation)
                    
            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]
                    
                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0] 

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))
            
        dump(tree, features=features)
Ejemplo n.º 23
0
from ete2 import PhyloTree
# Loads an example tree
nw = """
((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),
(Ptr_002,(Hsa_002,Mmu_002))));
"""
t = PhyloTree(nw)
print t
#                    /-Dme_001
#          /--------|
#         |          \-Dme_002
#         |
#         |                              /-Cfa_001
#         |                    /--------|
#---------|                   |          \-Mms_001
#         |          /--------|
#         |         |         |                    /-Hsa_001
#         |         |         |          /--------|
#         |         |          \--------|          \-Ptr_001
#          \--------|                   |
#                   |                    \-Mmu_001
#                   |
#                   |          /-Ptr_002
#                    \--------|
#                             |          /-Hsa_002
#                              \--------|
#                                        \-Mmu_002
#
# To obtain all the evolutionary events involving a given leaf node we
# use get_my_evol_events method
matches = t.search_nodes(name="Hsa_001")
Ejemplo n.º 24
0
                               ["Green", "Red"])
            pie.opacity = 0.5
            #faces.add_face_to_node(name_face, node, column=0, position="branch-right")
            faces.add_face_to_node(pie, node, column=0, position="float")


ts = TreeStyle()
# Do not add leaf names automatically ts.show_leaf_name = False
# Use my custom layout
ts.show_leaf_name = False
ts.layout_fn = my_layout

#t = PhyloTree('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos.nwk', format=1)

t = PhyloTree(
    '/Volumes/MP_HD/CI_GENOME_SEQ/CI_orthomcl_data/gain_loss_tree_frm_orthogroups/CI_badirate_branch_no_tree_no_names.nwk',
    format=1)

#dataorder = ['FRR2161','FRR3841','FRR4059','FRR3840','F4','BR2SD2','BR2','BR2SD1','G09043','G11702','G11203SD4','G11203SD3','G11203','G11203SD1','G09027SD2','G09027SD1','G09027','FRR3871','FRR3482','HR2','G11012']
#nos = ["1",'2','4','5','7','10','11','13','15','17','19','20','22','24','26','27','29','32','33','35','37']

#dataorder = ['FRR2161','FRR3841','FRR3840','FRR4059','F4','BR2SD2','BR2','BR2SD1','G09043','G11702','G11203SD4','G11203SD3','G11203','G11203SD1','G09027SD2','G09027SD1','G09027','FRR3871','FRR3482','HR2','G11012']
#nos = ["1",'2','4','5','7','10','11','13','15','17','19','20','22','24','26','27','29','32','33','35','37']

#branch_to_node = {24:25,39:38,41:43,33:35,5:5,31:0,18:22,14:16,40:42,42:41,35:36,36:32,27:30,15:2,26:29,12:15,29:26,21:19,11:4,32:34,6:11,17:20,22:17,16:18,13:3,34:33,43:37,3:6,7:13,8:14,37:39,10:10,44:31,30:24,20:21,2:8,1:7,38:40,28:28,4:9,25:27,19:23,23:1,9:12}
#print branch_to_node

inchanges = open(
    '/Volumes/MP_HD/CI_GENOME_SEQ/CI_orthomcl_data/gain_loss_tree_frm_orthogroups/CI_denovo_gene_gain_loss_table.txt',
    'r')
Ejemplo n.º 25
0
def run(args):
    if args.text_mode:
        from ete2 import Tree
        for tindex, tfile in enumerate(args.src_tree_iterator):
            #print tfile
            if args.raxml:
                nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
                t = Tree(nw)
            else:
                t = Tree(tfile)
            
            print t.get_ascii(show_internal=args.show_internal_names,
                              attributes=args.show_attributes)
        return
        
    import random
    import re
    import colorsys
    from collections import defaultdict
    from ete2 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle,
                         add_face_to_node, random_color)
    
    global FACES
    
    if args.face:
        FACES = parse_faces(args.face)
    else:
        FACES = []

    # VISUALIZATION
    ts = TreeStyle()
    ts.mode = args.mode
    ts.show_leaf_name = True
    ts.tree_width = args.tree_width

    
    for f in FACES:
        if f["value"] == "@name":
            ts.show_leaf_name = False
            break

    if args.as_ncbi:
        ts.show_leaf_name = False
        FACES.extend(parse_faces(
            ['value:@sci_name, size:10, fstyle:italic',
             'value:@taxid, color:grey, size:6, format:" - %s"',
             'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal',
             'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal',
         ]))


    if args.alg:
        FACES.extend(parse_faces(
            ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type]
         ))

    if args.heatmap:
        FACES.extend(parse_faces(
            ['value:@name, size:10, pos:aligned, ftype:heatmap']
         ))
        
    if args.bubbles:
        for bubble in args.bubbles:
            FACES.extend(parse_faces(
                ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble,
             ]))
                        
    ts.branch_vertical_margin = args.branch_separation
    if args.show_support:
        ts.show_branch_support = True
    if args.show_branch_length:
        ts.show_branch_length = True
    if args.force_topology:
        ts.force_topology = True
    ts.layout_fn = lambda x: None
        
    for tindex, tfile in enumerate(args.src_tree_iterator):
        #print tfile
        if args.raxml:
            nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
            t = PhyloTree(nw)
        else:
            t = PhyloTree(tfile)

            
        if args.alg:
            t.link_to_alignment(args.alg, alg_format=args.alg_format)

        if args.heatmap:
            DEFAULT_COLOR_SATURATION = 0.3
            BASE_LIGHTNESS = 0.7
            def gradient_color(value, max_value, saturation=0.5, hue=0.1):    
                def rgb2hex(rgb):
                    return '#%02x%02x%02x' % rgb
                def hls2hex(h, l, s):
                    return rgb2hex( tuple(map(lambda x: int(x*255), colorsys.hls_to_rgb(h, l, s))))

                lightness = 1 - (value * BASE_LIGHTNESS) / max_value
                return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION)

            
            heatmap_data = {}
            max_value, min_value = None, None
            for line in open(args.heatmap):
                if line.startswith('#COLNAMES'):
                    pass
                elif line.startswith('#') or not line.strip():
                    pass
                else:
                    fields = line.split('\t')
                    name = fields[0].strip()

                    values = map(lambda x: float(x) if x else None, fields[1:])

                    maxv = max(values)
                    minv = min(values)
                    if max_value is None or maxv > max_value:
                        max_value = maxv
                    if min_value is None or minv < min_value:
                        min_value = minv                        
                    heatmap_data[name] = values

            heatmap_center_value = 0
            heatmap_color_center = "white"
            heatmap_color_up = 0.3
            heatmap_color_down = 0.7
            heatmap_color_missing = "black"
            
            heatmap_max_value = abs(heatmap_center_value - max_value)
            heatmap_min_value = abs(heatmap_center_value - min_value)
                    
            if heatmap_center_value <= min_value:
                heatmap_max_value = heatmap_min_value + heatmap_max_value
            else:
                heatmap_max_value = max(heatmap_min_value, heatmap_max_value)

                    
                    
        # scale the tree
        if not args.height: 
            args.height = None
        if not args.width: 
            args.width = None

        f2color = {}
        f2last_seed = {}
        for node in t.traverse():
            node.img_style['size'] = 0                
            if len(node.children) == 1:
                node.img_style['size'] = 2                
                node.img_style['shape'] = "square"
                node.img_style['fgcolor'] = "steelblue"

            ftype_pos = defaultdict(int)

            for findex, f in enumerate(FACES):
                if (f['nodetype'] == 'any' or 
                    (f['nodetype'] == 'leaf' and node.is_leaf()) or
                    (f['nodetype'] == 'internal' and not node.is_leaf())):


                    # if node passes face filters
                    if node_matcher(node, f["filters"]):
                        if f["value"].startswith("@"):
                            fvalue = getattr(node, f["value"][1:], None)
                        else:
                            fvalue = f["value"]

                        # if node's attribute has content, generate face
                        if fvalue is not None:
                            fsize = f["size"]
                            fbgcolor = f["bgcolor"]
                            fcolor = f['color']

                            if fcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if fbgcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fbgcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fbgcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if f["ftype"] == "text":
                                if f.get("format", None):
                                    fvalue = f["format"] % fvalue

                                F = TextFace(fvalue,
                                             fsize = fsize,
                                             fgcolor = fcolor or "black",
                                             fstyle = f.get('fstyle', None))

                            elif f["ftype"] == "fullseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="seq",
                                                       seqtail_format="seq",
                                                       height=fsize)
                            elif f["ftype"] == "compactseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq",
                                                       seqtail_format="compactseq",
                                                       height=fsize)
                            elif f["ftype"] == "blockseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq",
                                                   seqtail_format="blockseq",
                                                       height=fsize,
                                                       fgcolor=fcolor or "slategrey",
                                                       bgcolor=fbgcolor or "slategrey",
                                                       scale_factor = 1.0)
                                fbgcolor = None
                            elif f["ftype"] == "bubble":
                                try:
                                    v = float(fvalue)
                                except ValueError:
                                    rad = fsize
                                else:
                                    rad = fsize * v
                                F = faces.CircleFace(radius=rad, style="sphere",
                                                     color=fcolor or "steelblue")

                            elif f["ftype"] == "heatmap":
                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                else:
                                    col = f["column"]

                                for i, value in enumerate(heatmap_data.get(node.name, [])):
                                    ftype_pos[f["pos"]] += 1

                                    if value is None:
                                        color = heatmap_color_missing
                                    elif value > heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up)
                                    elif value < heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down)
                                    else:
                                        color = heatmap_color_center
                                    node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i)
                                    # Add header 
                                    # for i, name in enumerate(header):
                                    #    nameF = TextFace(name, fsize=7)
                                    #    nameF.rotation = -90
                                    #    tree_style.aligned_header.add_face(nameF, column=i)
                                F = None

                            elif f["ftype"] == "profile":
                                # internal profiles?
                                F = None
                            elif f["ftype"] == "barchart":
                                F = None
                            elif f["ftype"] == "piechart":
                                F = None



                            # Add the Face
                            if F:
                                F.opacity = f['opacity'] or 1.0

                                # Set face general attributes
                                if fbgcolor:
                                    F.background.color = fbgcolor

                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                    ftype_pos[f["pos"]] += 1    
                                else:
                                    col = f["column"]
                                node.add_face(F, column=col, position=f["pos"])

        if args.image:
            t.render("t%d.%s" %(tindex, args.image),
                     tree_style=ts, w=args.width, h=args.height, units=args.size_units)
        else:
            t.show(None, tree_style=ts)
Ejemplo n.º 26
0
    log=open(log_file, "w")
except:
    log=open("gephcort_run.log", "w")
# Logging start time
log.write("Start time: "+str(time.localtime()[0])+"-"+str(time.localtime()[2])+"-"+str(time.localtime()[1])+"\t"+str(time.localtime()[3])+":"+str(time.localtime()[4])+":"+str(time.localtime()[5])+"\n")

ape_objects={"delta.plot":"delta_plot", "dist.dna":"dist_dna", "dist.nodes":"dist_nodes", "node.depth":"node_depth", 
"node.depth.edgelength":"node_depth_edgelength","node.height":"node_height", "node.height.clado":"node_height_clado", 
"prop.part":"prop_part"}

ape=importr("ape", robject_translations = ape_objects)    # Required for phangorn
ph=importr("phangorn")    # Phylogenetic operations in R

print "All modules imported successfully"

t = PhyloTree(intree, alignment=seq, alg_format=seq_format)     # Main tree containing entire sequence
dtp = PhyloTree(intree)                 # Dummy tree for phenotype shuffling

print "Tree file read successfully"

phenfile=open(phen, "r")    # Phenotype file
phenlist=[]
for line in phenfile.readlines():
    phenlist.append([line.split("\t")[0].strip(), line.split("\t")[1].strip()])
phenfile.close()

phenotype={}    # Dictionary containing species names and their phenotype values

# Phenotype file should have two columns separated by tab containing taxa name
# in the first column and a numerical phenotype value in the second
#
Ejemplo n.º 27
0
        if re.match(r"^OrAeBC5", gene): para_list += 1
    if para_list > 0:
        return (True)
    else:
        return (False)


# get tree files from directory
tree_files = os.listdir(str(sys.argv[1]))
for tree in tree_files:
    if tree.startswith('RAxML_bipartitions.'):
        # get orthogroup id
        ortho = re.sub(r'\D', "", tree)
        # load newick tree
        #print(tree)
        t = PhyloTree(tree)
        #print(t)
        evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "w")
        ##########################################################################################
        # 			evolutionary events involving all taxa
        ##########################################################################################
        # Alternatively, you can scan the whole tree topology
        events = t.get_descendant_evol_events()
        # print its orthology and paralogy relationships
        for ev in events:
            if ev.etype == "S":
                evts.write(",".join(ev.in_seqs))
                evts.write("<===>")
                evts.write(",".join(ev.out_seqs))
                evts.write("\n")
            elif ev.etype == "D":
Ejemplo n.º 28
0
                 name2score[name] = "Fuzzy:%0.2f" %sim
                 
     for name in all_names:
         taxid = name2id.get(name, "???")
         realname = name2realname.get(name, name)
         score = name2score.get(name, "Exact:1.0")
         print "\t".join(map(str, [score, name, realname.capitalize(), taxid]))
         
 if args.taxid_file:
     all_taxids.extend(map(strip, open(args.taxid_file, "rU").read().split("\n")))
 if args.taxid:
     all_taxids.extend(args.taxid)
     
 reftree = None
 if args.reftree:
     reftree = PhyloTree(args.reftree)
     all_taxids.extend(list(set([n.name for n in reftree.iter_leaves()])))
             
 if all_taxids and args.info:
     log.info("Dumping %d taxid translations:" %len(all_taxids))
     all_taxids = set(all_taxids)
     all_taxids.discard("")
     translator = get_taxid_translator(all_taxids)
     for taxid, name in translator.iteritems():
         lineage = get_sp_lineage(taxid)
         named_lineage = ','.join(translate_to_names(lineage))
         lineage = ','.join(map(str, lineage))
         print "\t".join(map(str, [taxid, name, named_lineage, lineage ]))
     for notfound in all_taxids - set(str(k) for k in translator.iterkeys()):
         print >>sys.stderr, notfound, "NOT FOUND"
         
Ejemplo n.º 29
0
 MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
"""
iphylip_txt = """
 4 76
      seqA   MAEIPDETIQ QFMALT---H NIAVQYLSEF GDLNEALNSY YASQTDDIKD RREEAHQFMA
      seqB   MAEIPDATIQ QFMALTNVSH NIAVQY--EF GDLNEALNSY YAYQTDDQKD RREEAHQFMA
      seqC   MAEIPDATIQ ---ALTNVSH NIAVQYLSEF GDLNEALNSY YASQTDDQPD RREEAHQFMA
      seqD   MAEAPDETIQ QFMALTNVSH NIAVQYLSEF GDLNEAL--- ---------- -REEAHQ---
             LTNVSHQFMA LTNVSH
             LTNVSH---- ------
             LTNVSH---- ------
             -------FMA LTNVSH
"""
# Load a tree and link it to an alignment. As usual, 'alignment' can
# be the path to a file or data in text format.
t = PhyloTree("(((seqA,seqB),seqC),seqD);", alignment=fasta_txt, alg_format="fasta")

#We can now access the sequence of every leaf node
print "These are the nodes and its sequences:"
for leaf in t.iter_leaves():
    print leaf.name, leaf.sequence
#seqD MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
#seqC MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
#seqA MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
#seqB MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
#
# The associated alignment can be changed at any time
t.link_to_alignment(alignment=iphylip_txt, alg_format="iphylip")
# Let's check that sequences have changed
print "These are the nodes and its re-linked sequences:"
for leaf in t.iter_leaves():
Ejemplo n.º 30
0
    '''
    layout for CodemlTree
    '''
    if hasattr(node, "collapsed"):
        if node.collapsed == 1:
            node.img_style["draw_descendants"]= False
    if node.is_leaf():
        if hasattr (node, "sequence"):
            seqface =  MySequenceFace(node.sequence, "nt",
                                      fsize=10,
                                      col_w=11, interactive=True)
            faces.add_face_to_node(seqface, node, 1, aligned=True)

            
if __name__ == "__main__":
    tree = PhyloTree('(Orangutan,Human,Chimp);')
    tree.link_to_alignment("""
                           >Chimp
                           HARWLNEKLRCELRTLKKLGLDGYKAVSQYVKGRA
                           >Orangutan
                           DARWINEKLRCVSRTLKKLGLDGYKGVSQYVKGRP
                           >Human
                           DARWHNVKLRCELRTLKKLGLVGFKAVSQFVIRRA
                           """)
    nt_sequences = {"Human"    : "GACGCACGGTGGCACAACGTAAAATTAAGATGTGAATTGAGAACTCTGAAAAAATTGGGACTGGTCGGCTTCAAGGCAGTAAGTCAATTCGTAATACGTCGTGCG",
                    "Chimp"    : "CACGCCCGATGGCTCAACGAAAAGTTAAGATGCGAATTGAGAACTCTGAAAAAATTGGGACTGGACGGCTACAAGGCAGTAAGTCAGTACGTTAAAGGTCGTGCG",
                    "Orangutan": "GATGCACGCTGGATCAACGAAAAGTTAAGATGCGTATCGAGAACTCTGAAAAAATTGGGACTGGACGGCTACAAGGGAGTAAGTCAATACGTTAAAGGTCGTCCG"
                }
    for l in nt_sequences:
        (tree & l).nt_sequence = nt_sequences[l]
    tree.dist = 0
Ejemplo n.º 31
0
from ete2 import PhyloTree
# Creates a gene phylogeny with several duplication events at
# different levels. Note that we are using the default method for
# detecting the species code of leaves (three first lettes in the node
# name are considered the species code).
nw = """
((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001)
,Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));
"""
t = PhyloTree(nw)
print "Original tree:",
print t
#
#             /-Dme_001
#   /--------|
#  |          \-Dme_002
#  |
#  |                              /-Cfa_001
#  |                    /--------|
#  |                   |          \-Mms_001
#  |                   |
#--|                   |                                        /-Hsa_001
#  |                   |                              /--------|
#  |          /--------|                    /--------|          \-Hsa_003
#  |         |         |                   |         |
#  |         |         |          /--------|          \-Ptr_001
#  |         |         |         |         |
#  |         |         |         |          \-Mmu_001
#  |         |          \--------|
#   \--------|                   |                    /-Hsa_004
#            |                   |          /--------|
Ejemplo n.º 32
0
from ete2 import PhyloTree

# Loads a gene tree and its corresponding species tree. Note that
# species names in sptree are the 3 firs letters of leaf nodes in
# genetree.
gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);"
genetree = PhyloTree(gene_tree_nw)
sptree = PhyloTree(species_tree_nw)
print genetree
#                    /-Dme_001
#          /--------|
#         |          \-Dme_002
#         |
#         |                              /-Cfa_001
#         |                    /--------|
#---------|                   |          \-Mms_001
#         |          /--------|
#         |         |         |                    /-Hsa_001
#         |         |         |          /--------|
#         |         |          \--------|          \-Ptr_001
#          \--------|                   |
#                   |                    \-Mmu_001
#                   |
#                   |          /-Ptr_002
#                    \--------|
#                             |          /-Hsa_002
#                              \--------|
#                                        \-Mmu_002
#
# Let's reconcile our genetree with the species tree
Ejemplo n.º 33
0
from ete2 import PhyloTree
# Reads a phylogenetic tree (using default species name encoding)
t = PhyloTree("(((Hsa_001,Ptr_001),(Cfa_001,Mms_001)),(Dme_001,Dme_002));")
#                              /-Hsa_001
#                    /--------|
#                   |          \-Ptr_001
#          /--------|
#         |         |          /-Cfa_001
#         |          \--------|
#---------|                    \-Mms_001
#         |
#         |          /-Dme_001
#          \--------|
#                    \-Dme_002
#
# Prints current leaf names and species codes
print "Deafult mode:"
for n in t.get_leaves():
    print "node:", n.name, "Species name:", n.species
# node: Dme_001 Species name: Dme
# node: Dme_002 Species name: Dme
# node: Hsa_001 Species name: Hsa
# node: Ptr_001 Species name: Ptr
# node: Cfa_001 Species name: Cfa
# node: Mms_001 Species name: Mms
#
# We can also use our own leaf name parsing function to obtain species
# names. All we need to do is create a python function that takes
# node's name as argument and return its corresponding species name.
def get_species_name(node_name_string):
    # Species code is the first part of leaf name (separated by an
from Bio import SeqIO
from Bio import motifs
from Bio.Cluster import distancematrix
from Bio.Cluster import clustercentroids
records = list(SeqIO.parse("./txt/cm_perm_sequence_27_social.fasta", "fasta"))
for seq_record in SeqIO.parse("./txt/cm_perm_sequence_27_social.fasta", "fasta"):
    print seq_record.id
    print repr(seq_record.seq)
    print len(seq_record)
from Bio.Align.Applications import ClustalwCommandline
clustalx = '/Applications/PhylogeneticAnalysis/clustalw2'
cline = ClustalwCommandline(clustalx, infile="./txt/cm_perm_sequence_27_social.fasta")
print cline
stdout, stderr = cline()
from Bio import Phylo
tree = Phylo.read("./txt/cm_perm_sequence_27_social.dnd", "newick")
tree.rooted = True
#Phylo.draw(tree)

from ete2 import Tree
from ete2 import PhyloTree
t = PhyloTree('./txt/cm_perm_sequence_27_social.dnd')
t.link_to_alignment(alignment="./txt/cm_perm_sequence_27_social.fasta", alg_format="fasta")
#from ete2 import ClusterTree
#t = ClusterTree('./txt/cm_perm_sequence_27_social.dnd')
t.show()
#t.show("heatmap")
#t.show("cluster_cbars")
#t.show("cluster_bars")
#t.show("cluster_lines")
### Add known sequences (adapters)
with open(temp_file_name, 'a') as f:
    for name, seq in known_sequences.items():
        f.write(">"+str(name)+"\n")
        f.write(seq+"\n")

### Align
if verbose: print "aligning..."
aln_file_name = os.path.splitext(temp_file_name)[0] + ".afa"
align_muscle(temp_file_name, aln_file_name, gapopen=-1000.0)

### Build tree
if verbose: print "building tree..."
tree, aln = build_tree_FT(aln_file_name)
### Show in pretty format
pretty_tree = PhyloTree(str(tree), alignment=aln_file_name, alg_format="fasta")

pretty_tree.ladderize()

ts = TreeStyle()

pretty_tree.render(outfile, tree_style=ts)

### Clean up your mess
os.remove(temp_file_name)
os.remove(aln_file_name)

### TODO
# highlight adapter rows
# root on adapter?
Ejemplo n.º 36
0
import sys
from collections import defaultdict
from ete2 import PhyloTree

if len(sys.argv) > 1:
    t = PhyloTree(sys.argv[1])
else:
    t = PhyloTree()
    #t.populate(5000, reuse_names=True, names_library=map(lambda x: "%03d" %x, range(100)))

    #t.populate(5000, reuse_names=True, names_library=["aaa", "bbb", "ccc","dddd"])
    #t.set_species_naming_function(lambda x: x[:3])    
    #t = PhyloTree("((((Kla0008018:0.226825,(Kwa0003593:0.270871,(((((((Sce0006606:0.020101,(Smi0000169:0.045626,Sku0001100:0.091634)0.9:0.021336)0.473:0.004546,Spa0001368:0)0.806:0.040152,Sba0000063:0.059101)0.967:0.124536,Sca0004780:0.57162)0.36:0.045976,Cgl0005705:0.244154)0.94:0.080608,(((Spa0003632:0.005291,Sce0012358:0.019313)0.879:0.014349,Smi0005102:0.031246)0.028:0.000541,(Sba0002319:0.027948,Sku0001858:0.037758)0.873:0.023849)0.995:0.14497)0.859:0.056767,(Sca0004490:0.235469,Kpo0005032:0.313188)0.699:0.077825)0.807:0.085287)0.523:0.049374)0.606:0.167197,Ago0006484:0.438321)0.976:0.605273,Cal0012751:1.95721)0.975:0.332581,(Cal0010356:0.478947,((Ago0007434:1.13211,Kwa0002043:1.20443)0.282:0.216219,(Skl0001126:0.276168,Cgl0008719:0.5381)0.454:0.191735)0.934:0.438082)0.975:0.332581);")     
    #t = PhyloTree("((((((AAA1, AAA2),((BBB1,BBB2), AAA3)D1),(CCC1,CCC2)), AAA8)D2, (((AAA5, AAA6),((BBB5,BBB6), AAA4)D3),(CCC3,CCC4)))D4, D);", format=1)
    t = PhyloTree("((((((((AAA1, AAA2:0.111)a1,(((BBB1,ZZZ1)a2,MMM1)a3,AAA4)a4)a5, AAA3)a6,(AAA4, (AAA5, XXX1)a8)a9)a10,DDD)a11,DDD)a12,DDD)a13,DDD)root;", format=1)
    print t.get_ascii()
    
ntrees, ndups, sp_trees = t.get_speciation_trees(map_features=["dist"])

for sptree in sp_trees:
    print sptree.get_ascii(attributes=["dist"])

Ejemplo n.º 37
0
 def __init__(self):
     self.taxoDB = {}
     self.tree = PhyloTree()
     self.tree.name = "NoName"
Ejemplo n.º 38
0
 def test_ncbi_compare(self):
   t = PhyloTree( "((9606, (9598, 9606)), 10090);", sp_naming_function=lambda x: x.name )
   t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
Ejemplo n.º 39
0
#!/usr/bin/python
from __future__ import absolute_import

import sys
from ete2 import PhyloTree

if __name__ == "__main__":
    t = sys.argv[1]
    s = sys.argv[2]
    out = sys.argv[3]

    pt = PhyloTree(t)
#    pt.link_to_alignment(alignment=s)
    pt.render(out)
Ejemplo n.º 40
0
    "delta.plot": "delta_plot",
    "dist.dna": "dist_dna",
    "dist.nodes": "dist_nodes",
    "node.depth": "node_depth",
    "node.depth.edgelength": "node_depth_edgelength",
    "node.height": "node_height",
    "node.height.clado": "node_height_clado",
    "prop.part": "prop_part"
}

ape = importr("ape", robject_translations=ape_objects)  # Required for phangorn
ph = importr("phangorn")  # Phylogenetic operations in R

print "All modules imported successfully"

t = PhyloTree(intree, alignment=seq,
              alg_format=seq_format)  # Main tree containing entire sequence
dtp = PhyloTree(intree)  # Dummy tree for phenotype shuffling

print "Tree file read successfully"

phenfile = open(phen, "r")  # Phenotype file
phenlist = []
for line in phenfile.readlines():
    phenlist.append([line.split("\t")[0].strip(), line.split("\t")[1].strip()])
phenfile.close()

phenotype = {
}  # Dictionary containing species names and their phenotype values

# Phenotype file should have two columns separated by tab containing taxa name
# in the first column and a numerical phenotype value in the second
Ejemplo n.º 41
0
ts = TreeStyle()
# ts.mode = "c"

for i in open(in_id,"r").readlines():
    i=i.strip('\n')
    print i
    outf = "/".join(in_id.split('/')[:-1])+"/top_hits_pm1_madss/"+i+"_blastp_hits_"+in_eval+".fasta"
    no_hits = blast_gene(i,in_eval,indb,outf)
    print no_hits
    align_args = "/usr/local/bin/megacc -a "+ align_mao +" -o "+align_dir+" -s -d " + outf
    subprocess.Popen(align_args, shell=True).wait()
    sl(2)
    align_lis = glob.glob(align_dir + "/*.meg")
    alignpath = ''
    for j in align_lis:
        if i in j:
            tree_args = "/usr/local/bin/megacc -a "+ tree_mao +" -o "+tree_dir+" -d " + j
            subprocess.Popen(tree_args, shell=True).wait()
    tree_ls = glob.glob(tree_dir + "/*.nwk")
    for j in tree_ls:
        if i in j and "consensus" not in j:
            t = PhyloTree(j, format=1)
            #t.show()
            # t = Phylo.read(j,"newick")
            # #t.ladderize()
            # #Phylo.draw(t)
            # Phylo.write(t,j.replace(".nwk",".xml"),"phyloxml")
            # Phylo.draw_graphviz(t,prog="neato")
            t.render(tree_dir+"/"+i+"_blastp_hits_"+in_eval+".pdf",tree_style=ts,dpi=200)
Ejemplo n.º 42
0
__author__ = 'mjohnpayne'

import sys
from ete2 import Tree, faces, AttrFace, TreeStyle, NodeStyle, PhyloTree, PieChartFace
import math

# infile = open('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree.nwk','r')
# outfile = open('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos_only.nwk','w')
# infile = infile.read()

t = PhyloTree(
    '/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree.nwk',
    format=1)

# ts = TreeStyle()
#
# t.show(tree_style=ts)

for node in t:  #.iter_search_nodes():
    # name = node.name
    # name = name[name.find("_")+1:]
    # node.name = name
    print node.name
    if node.name == "41":
        node.dist = 5e-05

# t.write(outfile='/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos.nwk',format=1,)

ts = TreeStyle()

t.show(tree_style=ts)
Ejemplo n.º 43
0
    def get_topology(self,
                     taxids,
                     intermediate_nodes=False,
                     rank_limit=None,
                     collapse_subspecies=False,
                     annotate=True):
        """Given a list of taxid numbers, return the minimal pruned NCBI taxonomy tree
        containing all of them.

        :param False intermediate_nodes: If True, single child nodes
        representing the complete lineage of leaf nodes are kept. Otherwise, the
        tree is pruned to contain the first common ancestor of each group.

        :param None rank_limit: If valid NCBI rank name is provided, the tree is
        pruned at that given level. For instance, use rank="species" to get rid
        of sub-species or strain leaf nodes.

        :param False collapse_subspecies: If True, any item under the species
        rank will be collapsed into the species upper node.

        """
        from ete2 import PhyloTree
        sp2track = {}
        elem2node = {}
        for sp in taxids:
            track = []
            lineage = self.get_lineage(sp)
            id2rank = self.get_rank(lineage)

            for elem in lineage:
                if elem not in elem2node:
                    node = elem2node.setdefault(elem, PhyloTree())
                    node.name = str(elem)
                    node.taxid = elem
                    node.add_feature("rank",
                                     str(id2rank.get(int(elem), "no rank")))
                else:
                    node = elem2node[elem]
                track.append(node)
            sp2track[sp] = track

        # generate parent child relationships
        for sp, track in sp2track.iteritems():
            parent = None
            for elem in track:
                if parent and elem not in parent.children:
                    parent.add_child(elem)
                if rank_limit and elem.rank == rank_limit:
                    break
                parent = elem
        root = elem2node[1]

        #remove onechild-nodes
        if not intermediate_nodes:
            for n in root.get_descendants():
                if len(n.children) == 1 and int(n.name) not in taxids:
                    n.delete(prevent_nondicotomic=False)

        if len(root.children) == 1:
            tree = root.children[0].detach()
        else:
            tree = root

        if collapse_subspecies:
            to_detach = []
            for node in tree.traverse():
                if node.rank == "species":
                    to_detach.extend(node.children)
            for n in to_detach:
                n.detach()
        if annotate:
            self.annotate_tree(tree)

        return tree
Ejemplo n.º 44
0
            realname = name2realname.get(name, name)
            score = name2score.get(name, "Exact:1.0")
            print "\t".join(
                map(str,
                    [score, name, realname.capitalize(), taxid]))

    if args.taxid_file:
        all_taxids.extend(
            map(strip,
                open(args.taxid_file, "rU").read().split("\n")))
    if args.taxid:
        all_taxids.extend(args.taxid)

    reftree = None
    if args.reftree:
        reftree = PhyloTree(args.reftree)
        all_taxids.extend(list(set([n.name for n in reftree.iter_leaves()])))

    if all_taxids and args.info:
        log.info("Dumping %d taxid translations:" % len(all_taxids))
        all_taxids = set(all_taxids)
        all_taxids.discard("")
        translator = get_taxid_translator(all_taxids)
        for taxid, name in translator.iteritems():
            lineage = get_sp_lineage(taxid)
            named_lineage = ','.join(translate_to_names(lineage))
            lineage = ','.join(map(str, lineage))
            print "\t".join(map(str, [taxid, name, named_lineage, lineage]))
        for notfound in all_taxids - set(
                str(k) for k in translator.iterkeys()):
            print >> sys.stderr, notfound, "NOT FOUND"
Ejemplo n.º 45
0
#!/usr/bin/python

####Python script that can take in a gene tree and detect paralogs and duplications; uses python module ete2
###linh c 6/5/2014

import sys

# Import ete2 module
from ete2 import PhyloTree

# Take in file input
file = sys.argv[1]
output = sys.argv[2]

# Load a tree structure from a newick file.
t = PhyloTree(file)

print t 

# Alternatively, you can scan the whole tree topology
events = t.get_descendant_evol_events()

# Open output file
fo = open(output, "wb+")

# Print its orthology and paralogy relationships
fo.write( 'Events detected from the root of the tree,' + file + '\n')
for ev in events:
    if ev.etype == "S":
        fo.write ('ORTHOLOGY RELATIONSHIP:' + ','.join(ev.in_seqs) + '<====>' + ','.join(ev.out_seqs) + '\n')
    elif ev.etype == "D":
Ejemplo n.º 46
0
    exit()

# create output directory
events_dir = '%s' '/events' % (str(sys.argv[1]))
if not os.path.exists(events_dir):
    os.makedirs(events_dir)

# get tree files from directory
tree_files = os.listdir(str(sys.argv[1]))
for tree in tree_files:
    if re.match(r"^\d+\.fna\.aln.+\.tree", tree):
        # get orthogroup id
        ortho = re.sub(r'\D', "", tree)
        # load newick tree
        #print(tree)
        t = PhyloTree(tree)
        #print(t)
        evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "w")
        ##########################################################################################
        # 			evolutionary events involving all taxa
        ##########################################################################################
        # Alternatively, you can scan the whole tree topology
        events = t.get_descendant_evol_events()
        # print its orthology and paralogy relationships
        for ev in events:
            if ev.etype == "S":
                evts.write(",".join(ev.in_seqs))
                evts.write("<===>")
                evts.write(",".join(ev.out_seqs))
                evts.write("\n")
            elif ev.etype == "D":
Ejemplo n.º 47
0
def run(args):
    from ete2 import Tree, PhyloTree

    features = set()
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update([
                "taxid", "name", "rank", "bgcolor", "sci_name",
                "collapse_subspecies", "named_lineage", "lineage"
            ])
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)

        type2cast = {
            "str": str,
            "int": int,
            "float": float,
            "set": set,
            "list": list
        }

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" % field)

                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" % field)
                else:
                    raise ValueError("Unknown feature option [%s]" % field)

            if not aname and not asource:
                ValueError(
                    'name and source are required when annotating a new feature [%s]'
                    % annotation)

            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]

                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0]

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %
                                nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))

        dump(tree, features=features)