def add_taxon2(taxon): try: resolver = Resolver(terms=[taxon]) resolver.main() except: return(taxon+' not found by Global Names Resolver.') print(resolver.__dict__) taxon_list = resolver.retrieve('classification_path')[0] print(taxon_list) rank_list = resolver.retrieve('classification_path_ranks')[0] print(rank_list) try: node = Taxon.objects.get(name='root') except Taxon.DoesNotExist: node = Taxon.objects.create(name='root') for i in range(len(taxon_list)): if rank_list[i] != '': try: node = Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node) print(taxon_list[i]+' added to local database.') except IntegrityError: node = Taxon.objects.get(name=taxon_list[i]) print(taxon_list[i]+' not added to local database; already there.') pass
def add_taxon(taxon): try: Taxon.objects.get(name=taxon) return(taxon+' is already in local database.') except Taxon.DoesNotExist: try: resolver = Resolver(terms=[taxon]) resolver.main() except: return(taxon+' not found by Global Names Resolver.') taxon_list = resolver.retrieve('classification_path')[0] rank_list = resolver.retrieve('classification_path_ranks')[0] for i in range(len(taxon_list)): try: node = Taxon.objects.get(name=taxon_list[i])[0] except Taxon.DoesNotExist: if i==0: Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=None) else: Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node) return(taxon_list, rank_list)
def run(wd=os.getcwd(), logger=logging.getLogger('')): # PRINT STAGE logger.info("Stage 1: Names resolution") # DIRS outdir = os.path.join(wd, '1_names') temp_dir = os.path.join(wd, 'tempfiles') if not os.path.isdir(outdir): os.mkdir(outdir) # INPUT with open(os.path.join(temp_dir, "paradict.p"), "rb") as file: paradict = pickle.load(file) with open(os.path.join(temp_dir, "terms.p"), "rb") as file: terms = pickle.load(file) # PARAMETERS outgroupid = paradict["outgroupid"] ntools.etools.Entrez.email = paradict["email"] minspecies = int(paradict["minspecies"]) taxonomy = paradict["taxonomic_constraint"] taxonomy = taxonomy.split('-') ntools.logger = logger # PROCESS logger.info('Searching for taxids ....') logger.info('------TaxonNamesResolver:Start------') try: parentid = paradict["parentid"] except: parentid = False if len(terms) < minspecies: raise TooFewSpeciesError resolver = Resolver(terms=terms, datasource="NCBI", taxon_id=parentid, logger=logger) resolver.main() if len(resolver.retrieve('query_name')) < minspecies: raise TooFewSpeciesError logger.info('------TaxonNamesResolver:End------') logger.info("Generating names dictionary ....") namesdict, allrankids, parentid = ntools.genNamesDict(resolver=resolver, parentid=parentid, logger=logger) logger.info("Finding an outgroup ....") namesdict = ntools.getOutgroup(namesdict=namesdict, parentid=parentid, outgroupid=outgroupid, logger=logger) # add outgroup ids to allrankids allrankids.extend(namesdict['outgroup']['txids']) logger.info('Generating taxonomic tree ....') taxontree = ntools.genTaxTree(resolver=resolver, namesdict=namesdict, taxonomy=taxonomy, logger=logger) # OUTPUT # remove temp TNR folder shutil.rmtree("resolved_names") # write out changes to hidden pickled files with open(os.path.join(temp_dir, "namesdict.p"), "wb") as file: pickle.dump(namesdict, file) with open(os.path.join(temp_dir, "allrankids.p"), "wb") as file: pickle.dump(allrankids, file) # write namesdict as csv ntools.writeNamesDict(outdir, namesdict) # write taxon tree ntools.Phylo.write(taxontree, os.path.join(outdir, "taxontree.tre"), "newick") # FINISH MESSAGE logger.info('Stage finished. Resolved [{0}] names including outgroup.'. format(len(namesdict.keys())))
# EXAMPLE NAMES terms = [ 'H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta', 'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens', 'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus' ] # RESOLVE # pass the terms, the datasource and the logger (optional) resolver = Resolver(terms=terms, datasource="NCBI", logger=logger) resolver.main() # resolve! # CREATE TAXDICT # extract the unique names for each term ('idents', query_name is best as it is # guaranteed to be unique) idents = resolver.retrieve('query_name') # extract the lists of names for all known parental taxonomic groups for each # term ('lineages', e.g. H**o, Primate, Mammalia) lineages = resolver.retrieve('classification_path') # for Taxonomic IDs instead of names, use: # lineages = resolver.retrieve('classification_path_ids') # extract the lists of corresponding rank names for 'lineages' ('ranks', e.g. # species, genus etc.) for each entity ranks = resolver.retrieve('classification_path_ranks') # optional extra data slots are also possible, for example a list of 1s and 0s # it could be anything, just as long as its in the same order extra = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0] # create a taxonomy specifying the names and order of 'ranks'. N.B. this is the # default and is based on NCBI's taxonomy. taxonomy = [ 'subspecies', 'species', 'subgenus', 'genus', 'tribe', 'subfamily',
from taxon_names_resolver import taxTree # EXAMPLE NAMES terms = ['H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta', 'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens', 'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus'] # RESOLVE # pass the terms, the datasource and the logger (optional) resolver = Resolver(terms=terms, datasource="NCBI", logger=logger) resolver.main() # resolve! # CREATE TAXDICT # extract the unique names for each term ('idents', query_name is best as it is # guaranteed to be unique) idents = resolver.retrieve('query_name') # extract the lists of names for all known parental taxonomic groups for each # term ('lineages', e.g. H**o, Primate, Mammalia) lineages = resolver.retrieve('classification_path') # for Taxonomic IDs instead of names, use: # lineages = resolver.retrieve('classification_path_ids') # extract the lists of corresponding rank names for 'lineages' ('ranks', e.g. # species, genus etc.) for each entity ranks = resolver.retrieve('classification_path_ranks') # optional extra data slots are also possible, for example a list of 1s and 0s # it could be anything, just as long as its in the same order extra = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0] # create a taxonomy specifying the names and order of 'ranks'. N.B. this is the # default and is based on NCBI's taxonomy. taxonomy = ['subspecies', 'species', 'subgenus', 'genus', 'tribe', 'subfamily', 'family', 'superfamily', 'parvorder', 'infraorder', 'suborder',