def name_ancestors(timetreefile, to_table=False, ete3_algo=False, uniq=True): logger.info('Loading data') ### /!\ quoted_node_names only from ete3 v3.1.1 timetree = PhyloTree(timetreefile, format=1, quoted_node_names=True) ncbi = NCBITaxa() name2taxid = ncbi.get_name_translator([sp.replace('_', ' ') for sp in \ timetree.get_leaf_names()]) for leaf in timetree.get_leaves(): try: leaf.add_feature('taxid', name2taxid[leaf.name.replace('_', ' ')][0]) except KeyError: logger.warning('Species %r not found', leaf.name) leaf.delete(prevent_nondicotomic=True, preserve_branch_length=True) logger.info('Placing common ancestors') if ete3_algo: ncbi.annotate_tree(timetree, 'taxid') else: myannotate(timetree, ncbi) matchrename_ncbitax(timetree, uniq) #logger.debug({ft:getattr(timetree, ft) for ft in timetree.features}) if not to_table: print(timetree.write(format=1, format_root_node=True)) else: for node in timetree.traverse(): if not node.is_leaf(): print(node.oldname + '\t' + getattr(node, 'sci_name', ''))
def plot_taxids(taxids_list, tree_png, tree_nw, tax_db=None): if tax_db is not None: ncbi = NCBITaxa(dbfile=tax_db) else: ncbi=NCBITaxa() tree = ncbi.get_topology(taxids_list) ts = TreeStyle() ncbi.annotate_tree(tree, taxid_attr="sci_name") ts.show_leaf_name = False ts.mode = "c" ts.layout_fn = layout tree.render(tree_png, tree_style=ts) tree.write(format=1, outfile=tree_nw)
def annotate_ncbi_taxa(self, taxid_attr='species', tax2name=None, tax2track=None, tax2rank=None, dbfile=None): """Add NCBI taxonomy annotation to all descendant nodes. Leaf nodes are expected to contain a feature (name, by default) encoding a valid taxid number. All descendant nodes (including internal nodes) are annotated with the following new features: `Node.spname`: scientific spcies name as encoded in the NCBI taxonomy database `Node.named_lineage`: the NCBI lineage track using scientific names `Node.taxid`: NCBI taxid number `Node.lineage`: same as named_lineage but using taxid codes. Note that for internal nodes, NCBI information will refer to the first common lineage of the grouped species. :param name taxid_attr: the name of the feature that should be used to access the taxid number associated to each node. :param None tax2name: A dictionary where keys are taxid numbers and values are their translation into NCBI scientific name. Its use is optional and allows to avoid database queries when annotating many trees containing the same set of taxids. :param None tax2track: A dictionary where keys are taxid numbers and values are their translation into NCBI lineage tracks (taxids). Its use is optional and allows to avoid database queries when annotating many trees containing the same set of taxids. :param None tax2rank: A dictionary where keys are taxid numbers and values are their translation into NCBI rank name. Its use is optional and allows to avoid database queries when annotating many trees containing the same set of taxids. :param None dbfile : If provided, the provided file will be used as a local copy of the NCBI taxonomy database. :returns: tax2name (a dictionary translating taxid numbers into scientific name), tax2lineage (a dictionary translating taxid numbers into their corresponding NCBI lineage track) and tax2rank (a dictionary translating taxid numbers into rank names). """ ncbi = NCBITaxa(dbfile=dbfile) return ncbi.annotate_tree(self, taxid_attr=taxid_attr, tax2name=tax2name, tax2track=tax2track, tax2rank=tax2rank)
if newick: t = PhyloTree(args.newick) species2taxid = dict([ line.split()[0], line.strip().split()[1] ] for line in open(infile)) taxids = set(species2taxid.values()) else: ncbi = NCBITaxa() taxids = set([ line.strip() for line in open(infile) ]) if args.taxoncolors: taxon2color = dict([int(line.split()[0]), line.split()[1]] for line in open(args.taxoncolors)) tNCBI = ncbi.get_topology(taxids, intermediate_nodes=True) tNCBI = tNCBI.search_nodes(name="2759")[0] ncbi.annotate_tree(tNCBI, taxid_attr="name") tax2node = dict([node.taxid, node] for node in tNCBI.traverse()) if args.no_intermediate_nodes: for node in tNCBI.get_descendants(): if len(node.children) == 1: node.delete() if len(tNCBI.children) == 1: tNCBI = tNCBI.children[0] tax2node = {} for node in tNCBI.traverse(): tax2node[node.taxid] = node if args.taxoncolors: if node.taxid in taxon2color: node.add_feature("bgcolor", taxon2color[node.taxid])
species2taxid = dict( [line.split()[0], line.strip().split()[1]] for line in open(infile)) taxids = set(species2taxid.values()) else: ncbi = NCBITaxa() taxids = set([line.strip() for line in open(infile)]) if args.taxoncolors: taxon2color = dict( [int(line.split()[0]), line.split()[1]] for line in open(args.taxoncolors)) tNCBI = ncbi.get_topology(taxids, intermediate_nodes=True) tNCBI = tNCBI.search_nodes(name="2759")[0] ncbi.annotate_tree(tNCBI, taxid_attr="name") tax2node = dict([node.taxid, node] for node in tNCBI.traverse()) if args.no_intermediate_nodes: for node in tNCBI.get_descendants(): if len(node.children) == 1: node.delete() if len(tNCBI.children) == 1: tNCBI = tNCBI.children[0] tax2node = {} for node in tNCBI.traverse(): tax2node[node.taxid] = node if args.taxoncolors: if node.taxid in taxon2color: node.add_feature("bgcolor", taxon2color[node.taxid])