def genTaxTree(resolver, namesdict, logger, taxonomy=None, draw=False): """Return Phylo from TaxonNamesResolver class.""" ranks = resolver.retrieve('classification_path_ranks') qnames = resolver.retrieve('query_name') lineages = resolver.retrieve('classification_path') # replace ' ' with '_' for taxon tree qnames = [re.sub("\s", "_", e) for e in qnames] resolved_names_bool = [e in namesdict.keys() for e in qnames] ranks = [ranks[ei] for ei, e in enumerate(resolved_names_bool) if e] lineages = [lineages[ei] for ei, e in enumerate(resolved_names_bool) if e] # identify unresolved names unresolved_names = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if not e] idents = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if e] statement = "Unresolved names: " for each in unresolved_names: statement += " " + each logger.debug(statement) # make taxdict taxdict = TaxDict(idents=idents, ranks=ranks, lineages=lineages, taxonomy=taxonomy) # make treestring treestring = taxTree(taxdict) if not taxonomy: d = 22 # default_taxonomy + 1 in tnr else: d = len(taxonomy) + 1 # add outgroup treestring = '({0},outgroup:{1});'.format(treestring[:-1], float(d)) tree = Phylo.read(StringIO(treestring), "newick") if draw: Phylo.draw_ascii(tree) return tree
# EXPLORE TAXDICT # a dictionary for each ident with: 'lineage', 'taxref', 'ident', 'cident' and # 'rank' (+ 'extra') # the lineage taxdict['H**o sapiens']['lineage'] # N.B. not all lineages are named, '' # the ident is the same format as lineage e.g. it could be an ID taxdict['H**o sapiens']['ident'] # the 'cident' (Contextual Ident), the highest named taxonomic group unique to # this ident among all other idents taxdict['Arabidopsis thaliana']['cident'] # A. thaliana is the only plant # the 'taxref', a holder of 'ident' and taxonomic posistion. Requires printing # e.g. C. tytonis could only resolved to the genus level (22/01/2015) print(taxdict['Chlorotalpa tytonis']['taxref']) # check the taxonomy print(taxdict.taxonomy) # check the hierarchy, a dictionary of taxrefs ranked and grouped in the form: # {'rank':[([taxref1, taxref2, ....],'lineage1'), # ([taxref3, taxref4, ....],'lineage2'), ....]} print(taxdict.hierarchy) # we've also added an extra data slot taxdict['H**o sapiens']['extra'] # CREATE TREE # use the taxdict to create a Newick string treestring = taxTree(taxdict) # SAVE TREE with open('example.tre', 'w') as file: file.write(treestring)