コード例 #1
0
def name_ancestors(timetreefile, to_table=False, ete3_algo=False, uniq=True):
    logger.info('Loading data')
    ### /!\ quoted_node_names only from ete3 v3.1.1
    timetree = PhyloTree(timetreefile, format=1, quoted_node_names=True)
    ncbi = NCBITaxa()


    name2taxid = ncbi.get_name_translator([sp.replace('_', ' ') for sp in \
                                                    timetree.get_leaf_names()])

    for leaf in timetree.get_leaves():
        try:
            leaf.add_feature('taxid', name2taxid[leaf.name.replace('_',
                                                                   ' ')][0])
        except KeyError:
            logger.warning('Species %r not found', leaf.name)
            leaf.delete(prevent_nondicotomic=True, preserve_branch_length=True)

    logger.info('Placing common ancestors')
    if ete3_algo:
        ncbi.annotate_tree(timetree, 'taxid')
    else:
        myannotate(timetree, ncbi)
    matchrename_ncbitax(timetree, uniq)

    #logger.debug({ft:getattr(timetree, ft) for ft in timetree.features})

    if not to_table:
        print(timetree.write(format=1, format_root_node=True))
    else:
        for node in timetree.traverse():
            if not node.is_leaf():
                print(node.oldname + '\t' + getattr(node, 'sci_name', ''))
コード例 #2
0
def plot_taxids(taxids_list, tree_png, tree_nw, tax_db=None):
    if tax_db is not None:
        ncbi = NCBITaxa(dbfile=tax_db)
    else:
        ncbi=NCBITaxa()

    tree = ncbi.get_topology(taxids_list)
    ts = TreeStyle()
    ncbi.annotate_tree(tree, taxid_attr="sci_name")
    ts.show_leaf_name = False
    ts.mode = "c"
    ts.layout_fn = layout
    tree.render(tree_png, tree_style=ts)
    tree.write(format=1, outfile=tree_nw)
コード例 #3
0
ファイル: phylotree.py プロジェクト: muppetjones/ete
    def annotate_ncbi_taxa(self,
                           taxid_attr='species',
                           tax2name=None,
                           tax2track=None,
                           tax2rank=None,
                           dbfile=None):
        """Add NCBI taxonomy annotation to all descendant nodes. Leaf nodes are
        expected to contain a feature (name, by default) encoding a valid taxid
        number.

        All descendant nodes (including internal nodes) are annotated with the
        following new features:

        `Node.spname`: scientific spcies name as encoded in the NCBI taxonomy database

        `Node.named_lineage`: the NCBI lineage track using scientific names

        `Node.taxid`: NCBI taxid number

        `Node.lineage`: same as named_lineage but using taxid codes.


        Note that for internal nodes, NCBI information will refer to the first
        common lineage of the grouped species.

        :param name taxid_attr: the name of the feature that should be used to access the taxid number associated to each node.

        :param None tax2name: A dictionary where keys are taxid numbers and
        values are their translation into NCBI scientific name. Its use is
        optional and allows to avoid database queries when annotating many trees
        containing the same set of taxids.

        :param None tax2track: A dictionary where keys are taxid numbers and
        values are their translation into NCBI lineage tracks (taxids). Its use is
        optional and allows to avoid database queries when annotating many trees
        containing the same set of taxids.

        :param None tax2rank: A dictionary where keys are taxid numbers and
        values are their translation into NCBI rank name. Its use is optional
        and allows to avoid database queries when annotating many trees
        containing the same set of taxids.

        :param None dbfile : If provided, the provided file will be used as a
        local copy of the NCBI taxonomy database.

        :returns: tax2name (a dictionary translating taxid numbers into
        scientific name), tax2lineage (a dictionary translating taxid numbers
        into their corresponding NCBI lineage track) and tax2rank (a dictionary translating taxid numbers into
        rank names).

        """

        ncbi = NCBITaxa(dbfile=dbfile)
        return ncbi.annotate_tree(self,
                                  taxid_attr=taxid_attr,
                                  tax2name=tax2name,
                                  tax2track=tax2track,
                                  tax2rank=tax2rank)
コード例 #4
0
ファイル: phylotree.py プロジェクト: Ward9250/ete
    def annotate_ncbi_taxa(self, taxid_attr='species', tax2name=None, tax2track=None, tax2rank=None, dbfile=None):
        """Add NCBI taxonomy annotation to all descendant nodes. Leaf nodes are
        expected to contain a feature (name, by default) encoding a valid taxid
        number.

        All descendant nodes (including internal nodes) are annotated with the
        following new features:

        `Node.spname`: scientific spcies name as encoded in the NCBI taxonomy database

        `Node.named_lineage`: the NCBI lineage track using scientific names

        `Node.taxid`: NCBI taxid number

        `Node.lineage`: same as named_lineage but using taxid codes.


        Note that for internal nodes, NCBI information will refer to the first
        common lineage of the grouped species.

        :param name taxid_attr: the name of the feature that should be used to access the taxid number associated to each node.

        :param None tax2name: A dictionary where keys are taxid numbers and
        values are their translation into NCBI scientific name. Its use is
        optional and allows to avoid database queries when annotating many trees
        containing the same set of taxids.

        :param None tax2track: A dictionary where keys are taxid numbers and
        values are their translation into NCBI lineage tracks (taxids). Its use is
        optional and allows to avoid database queries when annotating many trees
        containing the same set of taxids.

        :param None tax2rank: A dictionary where keys are taxid numbers and
        values are their translation into NCBI rank name. Its use is optional
        and allows to avoid database queries when annotating many trees
        containing the same set of taxids.

        :param None dbfile : If provided, the provided file will be used as a
        local copy of the NCBI taxonomy database.

        :returns: tax2name (a dictionary translating taxid numbers into
        scientific name), tax2lineage (a dictionary translating taxid numbers
        into their corresponding NCBI lineage track) and tax2rank (a dictionary translating taxid numbers into
        rank names).

        """

        ncbi = NCBITaxa(dbfile=dbfile)
        return ncbi.annotate_tree(self, taxid_attr=taxid_attr, tax2name=tax2name, tax2track=tax2track, tax2rank=tax2rank)
コード例 #5
0
    if newick:
        t = PhyloTree(args.newick)      
        species2taxid = dict([ line.split()[0], line.strip().split()[1] ] for line in open(infile))
        taxids = set(species2taxid.values())
    else:
        ncbi = NCBITaxa()
        taxids = set([ line.strip() for line in open(infile) ])


    if args.taxoncolors:
        taxon2color = dict([int(line.split()[0]), line.split()[1]] for line in open(args.taxoncolors))

    tNCBI = ncbi.get_topology(taxids, intermediate_nodes=True)
    tNCBI = tNCBI.search_nodes(name="2759")[0]
    ncbi.annotate_tree(tNCBI, taxid_attr="name")
    tax2node = dict([node.taxid, node] for node in tNCBI.traverse())

    if args.no_intermediate_nodes:
        for node in tNCBI.get_descendants():
            if len(node.children) == 1:
                node.delete()
        if len(tNCBI.children) == 1:
            tNCBI = tNCBI.children[0]
    
    tax2node = {}
    for node in tNCBI.traverse():
        tax2node[node.taxid] = node
        if args.taxoncolors:
            if node.taxid in taxon2color:
                node.add_feature("bgcolor", taxon2color[node.taxid])
コード例 #6
0
        species2taxid = dict(
            [line.split()[0], line.strip().split()[1]]
            for line in open(infile))
        taxids = set(species2taxid.values())
    else:
        ncbi = NCBITaxa()
        taxids = set([line.strip() for line in open(infile)])

    if args.taxoncolors:
        taxon2color = dict(
            [int(line.split()[0]), line.split()[1]]
            for line in open(args.taxoncolors))

    tNCBI = ncbi.get_topology(taxids, intermediate_nodes=True)
    tNCBI = tNCBI.search_nodes(name="2759")[0]
    ncbi.annotate_tree(tNCBI, taxid_attr="name")
    tax2node = dict([node.taxid, node] for node in tNCBI.traverse())

    if args.no_intermediate_nodes:
        for node in tNCBI.get_descendants():
            if len(node.children) == 1:
                node.delete()
        if len(tNCBI.children) == 1:
            tNCBI = tNCBI.children[0]

    tax2node = {}
    for node in tNCBI.traverse():
        tax2node[node.taxid] = node
        if args.taxoncolors:
            if node.taxid in taxon2color:
                node.add_feature("bgcolor", taxon2color[node.taxid])