def get_desired_ranks(taxid, desired_ranks): ncbi = NCBITaxa() lineage = ncbi.get_lineage(taxid) names = ncbi.get_taxid_translator(lineage) lineage2ranks = ncbi.get_rank(names) ranks2lineage = dict( (rank, taxid) for (taxid, rank) in lineage2ranks.items()) return [ranks2lineage.get(rank, '0') for rank in desired_ranks]
def annotate_ncbi_taxa(self, taxid_attr='species', tax2name=None, tax2track=None, tax2rank=None, dbfile=None): """Add NCBI taxonomy annotation to all descendant nodes. Leaf nodes are expected to contain a feature (name, by default) encoding a valid taxid number. All descendant nodes (including internal nodes) are annotated with the following new features: `Node.spname`: scientific spcies name as encoded in the NCBI taxonomy database `Node.named_lineage`: the NCBI lineage track using scientific names `Node.taxid`: NCBI taxid number `Node.lineage`: same as named_lineage but using taxid codes. Note that for internal nodes, NCBI information will refer to the first common lineage of the grouped species. :param name taxid_attr: the name of the feature that should be used to access the taxid number associated to each node. :param None tax2name: A dictionary where keys are taxid numbers and values are their translation into NCBI scientific name. Its use is optional and allows to avoid database queries when annotating many trees containing the same set of taxids. :param None tax2track: A dictionary where keys are taxid numbers and values are their translation into NCBI lineage tracks (taxids). Its use is optional and allows to avoid database queries when annotating many trees containing the same set of taxids. :param None tax2rank: A dictionary where keys are taxid numbers and values are their translation into NCBI rank name. Its use is optional and allows to avoid database queries when annotating many trees containing the same set of taxids. :param None dbfile : If provided, the provided file will be used as a local copy of the NCBI taxonomy database. :returns: tax2name (a dictionary translating taxid numbers into scientific name), tax2lineage (a dictionary translating taxid numbers into their corresponding NCBI lineage track) and tax2rank (a dictionary translating taxid numbers into rank names). """ ncbi = NCBITaxa(dbfile=dbfile) return ncbi.annotate_tree(self, taxid_attr=taxid_attr, tax2name=tax2name, tax2track=tax2track, tax2rank=tax2rank)
def get_rank(taxid, rank_level): ncbi = NCBITaxa() lineage = ncbi.get_lineage(taxid) ranks = ncbi.get_rank(lineage) for rank in ranks.keys(): if ranks[rank] == rank_level: return rank return "None"
def get_family(taxid): ncbi = NCBITaxa() lineage = ncbi.get_lineage(taxid) ranks = ncbi.get_rank(lineage) for rank in ranks.keys(): if ranks[rank] == 'family': return rank return "None"
def get_rank(self, rank_level): ncbi = NCBITaxa() lineage = ncbi.get_lineage(self.tax_id) ranks = ncbi.get_rank(lineage) for rank in ranks.keys(): if ranks[rank] == rank_level: return rank return "N/A"
def getNcbiTaxonomy(): ncbi = NCBITaxa() nameToTaxIdList = ncbi.get_name_translator(ORGANISM_NAMES_LIST) #print (str(nameToTaxIdList)) with open (OUTPUT_FILE, "w") as outputFile: for name in ORGANISM_NAMES_LIST: #for name, taxIds in nameToTaxIdList.items(): taxIds = nameToTaxIdList[name] for eachId in taxIds: lineage = ncbi.get_lineage(str(eachId)) names = ncbi.get_taxid_translator(lineage) outputFile.write("\t".join([names[taxid] for taxid in lineage]) + "\n")
def parseVSearchOutputAgainstNCBI(vsearch_out, database, output_file, min_coverage, min_similarity): """Resolves vsearch matches in a vsearch output file to the taxonomic name taken from BOLD. Takes in a vsearch output file from usearch__global, parses the result for good matches, and writes an output file mapping sequence name to taxa name. :param vsearch_out: An output file from vsearch's usearch__global program. :param database: The database used as part of the vsearch usearch__global operation. :param output_file: Where to write the resulting file that maps sequence ID to taxanomic name. :param min_coverage: The minimum coverage for an acceptable vsearch match. :param min_similarity: The minimum simmilarity for an acceptable vsearch match. """ min_simm = float(min_similarity) min_coverage = float(min_coverage) ncbi = NCBITaxa() conn = sqlite3.connect(database) c = conn.cursor() query = "select taxid from gi_taxid where gi=%s" def getTaxFromId(taxId, taxonomy=[ "species", "genus", 'family', 'order', 'class', 'phylum' ]): myTaxonomy = dict([(a, "") for a in taxonomy]) taxId = int(taxId) for lin in ncbi.get_lineage(taxId): rank = ncbi.get_rank([lin]).values()[0] if rank in taxonomy: val = ncbi.get_taxid_translator([lin]).values()[0] myTaxonomy[rank] = val return ":".join([myTaxonomy[x] for x in taxonomy[::-1]]) with open(output_file, 'w') as out: for line in open(vsearch_out, 'r'): data = line.split() if float(data[4]) > min_coverage or float(data[2]) > min_simm: hit = c.execute(query % data[1]).fetchone() if hit: taxonomy = getTaxFromId(hit[0]) data.append(taxonomy) printVerbose("\t".join(data)) out.write("\t".join(data)) out.write("\n") else: printErrorMissingID(out, data[1])
def __init__(self, data_path, workbench=None, genomes=[], taxDb=None): self.data_path = data_path self.workbench = workbench self.metadata_path = pjoin(self.data_path, "metadata") if not os.path.exists(self.metadata_path): os.makedirs(self.metadata_path) self.metadata_file = pjoin(self.metadata_path, "metadata.csv") if taxDb: self.taxDb = taxDb else: self.taxDb = NCBITaxa() self.genomes = genomes
def ncbi_compare(self, autodetect_duplications=True, cached_content=None): if not cached_content: cached_content = self.get_cached_content() cached_species = set([n.species for n in cached_content[self]]) if len(cached_species) != len(cached_content[self]): print cached_species ntrees, ndups, target_trees = self.get_speciation_trees( autodetect_duplications=autodetect_duplications, map_features=["taxid"]) else: target_trees = [self] ncbi = NCBITaxa() for t in target_trees: ncbi.get_broken_branches(t, cached_content)
def check_taxa_db_age(dbLocation): # if file doesn't exist, catch the error and run the update, as it will create the file. ncbi = NCBITaxa() try: filetime = datetime.fromtimestamp(path.getctime(dbLocation)) one_month_ago = datetime.now() - timedelta(days=30) if filetime < one_month_ago: # File older than 1 month, update it: logInfo = '<> NCBITaxa Database older than 1 month, updating it <>' ncbi.update_taxonomy_database() else: logInfo = '<> NCBITaxa Database up to date <>' except: logInfo = "<> NCBITaxa Database didn't exist, downloaded it <>" ncbi.update_taxonomy_database() return(logInfo)
def main(): args = parser.parse_args() cazy_fp = args.cazy_fp p2taxid_fp = args.p2taxid_fp output_fp = args.output_fp len_fp = args.len_fp ncbi = NCBITaxa() # read in ncbi prot to taxid, keep in memory as a dict prot_to_taxid = read_NCBI_prot_to_taxid_gz(p2taxid_fp) # open hmm_len = read_hmm_len_fp(len_fp) # read in cazy cazy_f = fasta_iter(cazy_fp) # for each cazy, with open(output_fp, 'w') as f: for header, seq in cazy_f: acc = header.split('|')[0] fam = header.split('|')[1] try: gene_length = hmm_len[fam] except KeyError: gene_length = 1000 try: taxid = prot_to_taxid[acc] taxonomy = '.'.join(get_taxon_path(taxid, ncbi)) except KeyError: taxonomy = 'unclassified' outline = '{0}\t{1}\t{2}\t{3}\n'.format(header, fam, gene_length, taxonomy) f.write(outline)
def blast2summary_dict(db, blastpath): # (Path, Path) -> list[dict] """Reading in a blast output file, lookup all seqids to get taxids with a single blastdbcmd. Then, lookup the taxonomy using ETE2 via the taxid, and add that info to the blast info.""" rows = csv.DictReader(open(blastpath), delimiter='\t', fieldnames=[ 'qseqid', 'sseqid', 'pid', 'alnlen', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore' ]) rows = list(rows) seqids = map(get('sseqid'), rows) taxids = get_taxid(db, seqids) gis = (s.split('|')[1] for s in seqids) matches = dict( (taxids[gi], row) for gi, row in zip(gis, rows) if gi in taxids) ncbi = NCBITaxa( ) # downloads database and creates SQLite database if needed return dictmap(lambda tid, row: merge(row, taxonomy(ncbi, tid)), matches)
def get_desired_ranks(taxid, desired_ranks): """ Gets the parent TaxID for a particular Taxon node. Args: taxid: Taxon Node whose parent TaxID at a given level needs to be determined desired_ranks: Parent Taxon level at which Taxon ID must be determined. Returns: List of TaxIDs at particular taxon level """ ncbi = NCBITaxa() lineage = ncbi.get_lineage(taxid) names = ncbi.get_taxid_translator(lineage) lineage2ranks = ncbi.get_rank(names) ranks2lineage = dict( (rank, taxid) for (taxid, rank) in lineage2ranks.items()) return [ranks2lineage.get(rank, '0') for rank in desired_ranks]
def blast2summary_dict(db, blastpath, ete2_db): # (Path, Path) -> list[dict] """Reading in a blast output file, lookup all seqids to get taxids with a single blastdbcmd. Then, lookup the taxonomy using ETE2 via the taxid, and add that info to the blast info.""" # rows = csv.DictReader(open(blastpath), delimiter='\t',fieldnames=[SEQID, 'sseqid','pid', 'alnlen','gapopen','qstart','qend','sstart','send','evalue','bitscore']) rows = csv.DictReader(open(blastpath), delimiter='\t', fieldnames=blast_columns) rows = list(rows) seqids = map(get('sseqid'), rows) taxids = get_taxid(db, seqids) def get_gi(s): fields = s.split('|') if len(fields) > 1: return fields[1] else: raise ValueError("Seq ID %s is missing GI fields and '|'" % s) gis = imap(get_gi, seqids) #TODO: change matches to use something unique--not the TAXID! actually, why is it a dict # in the first place? it should be a list of dictionaries, and then map over # the dictionaries to merge them with the taxonomy info # this will replace the lines: # matches = . . . # items = . . . #matches = dict((taxids[gi], row) for gi, row in zip(gis,rows) if gi in taxids) ncbi = NCBITaxa( ete2_db) # downloads database and creates SQLite database if needed # items = dictmap(lambda tid,row: merge(row, taxonomy(ncbi, tid)), matches) matches = [ assoc(row, 'taxid', taxids[gi]) for gi, row in zip(gis, rows) if gi in taxids ] items = [merge(row1, taxonomy(ncbi, row1['taxid'])) for row1 in matches] res = imap(partial(keyfilter, csv_fields.__contains__), items) return res
val = [color[2] for color in hsv] ind = np.lexsort((val, sat, hue)) sorted_colors = [colors_[i] for i in ind] colors_final = [] for i, (name, color) in enumerate(sorted_colors): colors_final.append(color) import random random.shuffle(colors_final) colors_mapping = {} #set up NCBI database from ete2 import NCBITaxa ncbi = NCBITaxa('/dfs/scratch0/manans/.etetoolkit/taxa.sqlite') # read .nemb file EMBEDDING_FILE = 'emb/n2v-avg.nemb' histograms = [] species_ids = [] with open(EMBEDDING_FILE, 'r') as tf: for line in tf: ls = line.split(' ') species_ids.append(ls[0]) v = [] for n in ls[1:]: v.append(float(n)) histograms.append(v)
from pandas import DataFrame from Bio import SeqIO from pandas import Index from ete2 import NCBITaxa data_path = "/home/moritz/people/MoreData/genomes/img_od1s" img_fasta = "/home/moritz/people/MoreData/raw_imgs/od1s.fasta" img_xls = "/home/moritz/people/MoreData/raw_imgs/od1s.xls" name = "parcu_from_img_" taxDb = NCBITaxa() contigs = DataFrame.from_csv(img_xls, sep="\t", header=0, index_col=0) manual_taxo = taxDb.get_name_translator(['Candidatus Parcubacteria' ]).values()[0][0] metadata = { name + str(g): { 'IMG_ID': g, 'name': name + str(g), 'species_taxid': manual_taxo, 'long_name': contigs.loc[contigs['Genome ID'] == g]['Genome'].iloc[0] } for g in set(contigs['Genome ID']) } seq_dict = {k: [] for k in metadata} with open(img_fasta, "r") as file: for i, c in enumerate(SeqIO.parse(file, "fasta")): seq_dict[name + str(contigs.iloc[i]['Genome ID'])] += [c]
import sys import os import urllib2 import gzip import biom import shutil from numpy import random as np_rand from ete2 import NCBITaxa from scripts.loggingwrapper import LoggingWrapper as logger try: from configparser import ConfigParser except ImportError: from ConfigParser import ConfigParser ncbi = NCBITaxa() RANKS = ['species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'] MAX_RANK = 'family' _log = None """ Reads a BIOM file and creates map of OTU: lineage, abundance BIOM file format needs to have a taxonomy field in metadata which contains the taxonomy in the format: RANK__SCINAME; LOWERRANK_LOWERSCINAME """ def read_taxonomic_profile(biom_profile, config, no_samples = None): table = biom.load_table(biom_profile) ids = table.ids(axis="observation") samples = table.ids() if no_samples is None: no_samples = len(samples)
def run(args): # add lineage profiles/stats import re from ete2 import PhyloTree, NCBITaxa if not args.taxonomy and not args.info: args.taxonomy = True ncbi = NCBITaxa() all_taxids = {} all_names = set() queries = [] if not args.search: log.error('Search terms should be provided (i.e. --search) ') sys.exit(-1) for n in args.search: queries.append(n) try: all_taxids[int(n)] = None except ValueError: all_names.add(n.strip()) # translate names name2tax = ncbi.get_name_translator(all_names) all_taxids.update([(v, None) for v in name2tax.values()]) not_found_names = all_names - set(name2tax.keys()) if args.fuzzy and not_found_names: log.warn("%s unknown names", len(not_found_names)) for name in not_found_names: # enable extension loading tax, realname, sim = ncbi.get_fuzzy_name_translation( name, args.fuzzy) if tax: all_taxids[tax] = None name2tax[name] = tax name2realname[name] = realname name2score[name] = "Fuzzy:%0.2f" % sim if args.taxonomy: log.info("Dumping NCBI taxonomy of %d taxa..." % (len(all_taxids))) t = ncbi.get_topology(all_taxids.keys(), intermediate_nodes=args.full_lineage, rank_limit=args.rank_limit, collapse_subspecies=args.collapse_subspecies) id2name = ncbi.get_taxid_translator([n.name for n in t.traverse()]) for n in t.traverse(): n.add_features(taxid=n.name) n.add_features(sci_name=str(id2name.get(int(n.name), "?"))) n.name = "%s - %s" % (id2name.get(int(n.name), n.name), n.name) lineage = ncbi.get_lineage(n.taxid) n.add_features( named_lineage='|'.join(ncbi.translate_to_names(lineage))) dump(t, features=[ "taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage" ]) elif args.info: print '# ' + '\t'.join( ["Taxid", "Sci.Name", "Rank", "Named Lineage", "Taxid Lineage"]) translator = ncbi.get_taxid_translator(all_taxids) ranks = ncbi.get_rank(all_taxids) for taxid, name in translator.iteritems(): lineage = ncbi.get_lineage(taxid) named_lineage = ','.join(ncbi.translate_to_names(lineage)) lineage_string = ','.join(map(str, lineage)) print '\t'.join([ str(taxid), name, ranks.get(taxid, ''), named_lineage, lineage_string ])
def get_name(taxid): ncbi = NCBITaxa() names = ncbi.get_taxid_translator([taxid]) return names[taxid]
def taxo_msa(outfile='taxo_msa.svg', taxids=[], annotation='', msa=[], title='', width=2000): """ Visualize MSA together with a taxonomy tree taxids - list of taxids in the same order as seqs in msa """ # taxid2gi={f_df.loc[f_df.gi==int(gi),'taxid'].values[0]:gi for gi in list(f_df['gi'])} # gi2variant={gi:f_df.loc[f_df.gi==int(gi),'hist_var'].values[0] for gi in list(f_df['gi'])} # msa_dict={i.id:i.seq for i in msa_tr} ncbi = NCBITaxa() taxids = map(int, taxids) t = ncbi.get_topology(taxids, intermediate_nodes=False) a = t.add_child(name='annotation') a.add_feature('sci_name', 'annotation') t.sort_descendants(attr='sci_name') ts = TreeStyle() def layout(node): # print node.rank # print node.sci_name if getattr(node, "rank", None): if (node.rank in ['order', 'class', 'phylum', 'kingdom']): rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred") node.add_face(rank_face, column=0, position="branch-top") if node.is_leaf(): sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue") node.add_face(sciname_face, column=0, position="branch-right") if node.is_leaf() and not node.name == 'annotation': s = str(msa[taxids.index(int(node.name))].seq) seqFace = SeqMotifFace( s, [[0, len(s), "seq", 10, 10, None, None, None]], scale_factor=1) add_face_to_node(seqFace, node, 0, position="aligned") # gi=taxid2gi[int(node.name)] add_face_to_node(TextFace(' ' + msa[taxids.index(int(node.name))].id), node, column=1, position="aligned") # add_face_to_node(TextFace(' '+str(int(node.name))+' '),node,column=2, position = "aligned") # add_face_to_node(TextFace(' '+str(gi2variant[gi])+' '),node,column=3, position = "aligned") if node.is_leaf() and node.name == 'annotation': if (annotation): s = annotation # get_hist_ss_in_aln_as_string(msa_tr) else: s = ' ' * len(msa[0].seq) seqFace = SeqMotifFace( s, [[0, len(s), "seq", 10, 10, None, None, None]], scale_factor=1) add_face_to_node(seqFace, node, 0, position="aligned") add_face_to_node(TextFace(' ' + 'SEQ_ID'), node, column=1, position="aligned") # add_face_to_node(TextFace(' '+'NCBI_TAXID'+' '),node,column=2, position = "aligned") # add_face_to_node(TextFace(' '+'Variant'+' '),node,column=3, position = "aligned") ts.layout_fn = layout ts.show_leaf_name = False ts.title.add_face(TextFace(title, fsize=20), column=0) t.render(outfile, w=width, dpi=300, tree_style=ts)
def taxo_seq_architecture(seqreclist=[], outfile='taxo_arch.svg', taxids=[], annotation='', title='', width=2000): """ Visualize sequence architecture together with a taxonomy tree seqreclist - contains a list of seqres. each seqrec should have a list of features in biobython SeqFeature format. features of type "domain" will be plotted as boxes features of type "xxxx" will be plotted as ... taxids - list of taxids in the same order as seqs in msa, if now provided will assume that seqrecs are in genbank format and attempt to get taxids from there. """ aa = [ 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '.', '-' ] def get_color(str): colorlist = [ 'red', 'green', 'yellow', 'lightblue', 'cyan', 'magenta', 'orange', 'pink', 'lightgreen' ] return colorlist[hash(str) % 9] if len(taxids) == 0: taxids = map(get_taxid_from_gbrec, seqreclist) ncbi = NCBITaxa() taxids = map(int, taxids) t = ncbi.get_topology(taxids, intermediate_nodes=False) # a=t.add_child(name='annotation') # a.add_feature('sci_name','annotation') t.sort_descendants(attr='sci_name') ts = TreeStyle() def layout(node): # print node.rank # print node.sci_name if getattr(node, "rank", None): if (node.rank in ['order', 'class', 'phylum', 'kingdom']): rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred") node.add_face(rank_face, column=0, position="branch-top") if node.is_leaf(): sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue") node.add_face(sciname_face, column=0, position="branch-right") if node.is_leaf() and not node.name == 'annotation': #here we are adding faces and we need to play with seqmotif face seq = str(seqreclist[taxids.index(int(node.name))].seq) motifs = [] #[[0,len(seq), "seq", 10, 10, None, None, None]] for f in seqreclist[taxids.index(int(node.name))].features: if f.type == 'domain': motifs.append([ f.location.start, f.location.end, "[]", None, 10, "blue", get_color(f.qualifiers['name']), "arial|8|black|%s" % f.qualifiers['name'] ]) if f.type == 'motif': #It turns out that we need to solve overlap problem here, here it is solved only in case of one overlap s = f.location.start e = f.location.end flag = True overlappedm = [] for m in motifs: if m[2] == 'seq' and m[0] < e and m[ 1] > s: #we have an overlap, four cases, preceding motife always is on top flag = False overlappedm.append(m) if not flag: #we have to solve multiple overlap problem #let's do it by scanning sflag = False eflag = False for x in range(s, e + 1): if not sflag: #check if we can start overlap = False for m in overlappedm: if x >= m[0] and x < m[1]: overlap = True if not overlap: ts = x sflag = True #check if is time to end if sflag and not eflag: overlap = False for m in overlappedm: if x == m[0]: overlap = True if overlap or x == e: te = x eflag = True if sflag and eflag: motifs.append([ ts, te, "seq", 10, 10, "black", get_color(f.qualifiers['name']), None ]) sflag = False eflag = False if flag: motifs.append([ f.location.start, f.location.end, "seq", 10, 10, "black", get_color(f.qualifiers['name']), None ]) seqFace = SeqMotifFace(seq, motifs, scale_factor=1, seq_format="[]") seqFace.overlaping_motif_opacity = 1.0 # seqFace.fg=aafgcolors # seqFace.bg=aabgcolors_gray add_face_to_node(seqFace, node, 0, position="aligned") # gi=taxid2gi[int(node.name)] add_face_to_node( TextFace(' ' + seqreclist[taxids.index(int(node.name))].id + ' '), node, column=1, position="aligned") # add_face_to_node(TextFace(' '+str(int(node.name))+' '),node,column=2, position = "aligned") # add_face_to_node(TextFace(' '+str(gi2variant[gi])+' '),node,column=3, position = "aligned") #We currently disable annotation if node.is_leaf() and node.name == 'annotation': if (annotation): s = annotation # get_hist_ss_in_aln_as_string(msa_tr) else: s = ' ' * max(map(lambda x: len(x.seq), seqreclist)) # seqFace = SeqMotifFace(s,[[0,len(s), "seq", 10, 10, None, None, None]],scale_factor=1) # add_face_to_node(seqFace, node, 0, position="aligned") # add_face_to_node(TextFace(' '+'SEQ_ID'),node,column=1, position = "aligned") # add_face_to_node(TextFace(' '+'NCBI_TAXID'+' '),node,column=2, position = "aligned") # add_face_to_node(TextFace(' '+'Variant'+' '),node,column=3, position = "aligned") ts.layout_fn = layout ts.show_leaf_name = False ts.title.add_face(TextFace(title, fsize=20), column=0) t.render(outfile, w=width, dpi=300, tree_style=ts)
def main(): args = parser.parse_args() input_fp = args.input_fp tdt_out_fp = args.tdt_out_fp h2gt_out_fp = args.h2gt_out_fp rank_headers = args.rank_headers ranks = args.ranks ncbi = NCBITaxa() # input_fp = './R1_trimmed_CAT_rare_genefamilies_cpm_ko.tsv' h2gt = read_humann2_genetable_generator(open(input_fp)) rank_headers = rank_headers.split(',') ranks = ranks.split(',') tax_dict = {} if tdt_out_fp: tdt_out_f = open(tdt_out_fp, 'w') if h2gt_out_fp: h2gt_out_f = open(h2gt_out_fp, 'w') first_h = True first_t = True for gene, header, line, tax in h2gt: lineage = list(rank_headers) if tax and tax not in tax_dict: best_id = None family, genus, species = clean_humann2_taxon(tax) best_id = get_best_ncbi_id(family, genus, species, ncbi) if best_id is not None: lineage = get_taxon_path(best_id, ncbi, ranks=ranks, rank_headers=rank_headers) tax_dict[tax] = lineage if tax: lineage = tax_dict[tax] if tdt_out_fp: if first_t: first_t = False tdt_out_f.write('Gene Family\t{0}\t{1}\n'.format( '\t'.join(header), '\t'.join(ranks))) elif tax: tdt_out_f.write('{0}\t{1}\t{2}\n'.format( gene, '\t'.join(line), '\t'.join(lineage))) if h2gt_out_fp: if first_h: h2gt_out_f.write('# Gene Family\t{0}\n'.format( '\t'.join(header))) first_h = False if tax: if lineage == rank_headers: h2gt_out_f.write('{0}|{1}\t{2}\n'.format( gene, 'unknown', '\t'.join(line))) else: h2gt_out_f.write('{0}|{1}\t{2}\n'.format( gene, '.'.join(lineage), '\t'.join(line))) else: h2gt_out_f.write('{0}\t{1}\n'.format(gene, '\t'.join(line))) if tdt_out_fp: tdt_out_f.close() if h2gt_out_f: h2gt_out_f.close()