88770 name: Panarthropoda blast name: Panarthropoda rank: no rank parent: 1206794 1206794 name: Ecdysozoa blast name: Ecdysozoa rank: no rank parent: 33317 33317 name: Protostomia blast name: Protostomia rank: no rank parent: 33213 33213 name: Bilateria blast name: Bilateria rank: no rank parent: 6072 6072 name: Eumetazoa blast name: Eumetazoa rank: no rank parent: 33208 33208 name: Metazoa blast name: animals rank: kingdom parent: 33154 33154 name: Opisthokonta blast name: Opisthokonta rank: no rank parent: 2759 2759 name: Eukaryota blast name: eukaryotes rank: superkingdom parent: 131567 """) sys.exit(0) else: ids=sys.argv[1:] taxa = taxon.read_nodes() names,blastname = taxon.read_names() divs = taxon.read_divisions() for i in ids: c=0 if i not in taxa: sys.stderr.write("Error: ID " + str(i) + " was not found in the taxonomy filei\n") sys.exit(0) while taxa[i].parent != '1' and i != '1': #print " " * c, taxa[i].taxid, "name:", names[i].name, "rank:", taxa[i].rank, "div:", taxa[i].division, "code:", divs[taxa[i].division].code bn=names[i].name if i in blastname: bn=blastname[i].name print("{} name: {} blast name: {} rank: {} parent: {}".format(taxa[i].taxid, names[i].name, bn, taxa[i].rank, taxa[i].parent)) i=taxa[i].parent c+=1
import argparse import taxon if __name__ == '__main__': parser = argparse.ArgumentParser(description="Append taxonomy to the patric metadata file. This adds it at column 67") parser.add_argument('-f', help='patric metadata file', required=True) parser.add_argument('-o', help='output file', required=True) parser.add_argument('-t', help='taxonomy directory (default=/home2/db/taxonomy/current/)', default='/home2/db/taxonomy/current/') parser.add_argument('-v', help='verbose output', action="store_true") args = parser.parse_args() sys.stderr.write("Reading taxonomy\n") taxa = taxon.read_nodes(directory=args.t) names, blastname = taxon.read_names(directory=args.t) divs = taxon.read_divisions(directory=args.t) sys.stderr.write("Read taxonomy\n") want = ['superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] with open(args.o, 'w', encoding='utf-8') as out: with open(args.f, 'r', encoding='utf-8') as f: for l in f: p = l.strip().split("\t") while (len(p) <= 68): p.append("") if l.startswith("genome_id"): out.write("{}\t{}\n".format(l.strip(), "\t".join(want))) continue
88770 name: Panarthropoda blast name: Panarthropoda rank: no rank parent: 1206794 1206794 name: Ecdysozoa blast name: Ecdysozoa rank: no rank parent: 33317 33317 name: Protostomia blast name: Protostomia rank: no rank parent: 33213 33213 name: Bilateria blast name: Bilateria rank: no rank parent: 6072 6072 name: Eumetazoa blast name: Eumetazoa rank: no rank parent: 33208 33208 name: Metazoa blast name: animals rank: kingdom parent: 33154 33154 name: Opisthokonta blast name: Opisthokonta rank: no rank parent: 2759 2759 name: Eukaryota blast name: eukaryotes rank: superkingdom parent: 131567 """) sys.exit(0) else: ids = sys.argv[1:] taxa = taxon.read_nodes() names, blastname = taxon.read_names() divs = taxon.read_divisions() for i in ids: c = 0 if i not in taxa: sys.stderr.write("Error: ID " + str(i) + " was not found in the taxonomy filei\n") sys.exit(0) while taxa[i].parent != '1' and i != '1': #print " " * c, taxa[i].taxid, "name:", names[i].name, "rank:", taxa[i].rank, "div:", taxa[i].division, "code:", divs[taxa[i].division].code bn = names[i].name if i in blastname: bn = blastname[i].name print( "{} name: {} blast name: {} rank: {} parent: {}".format( taxa[i].taxid, names[i].name, bn, taxa[i].rank, taxa[i].parent))