# only return genus and species ... dump everything after the first two words and remove dots if species: return species.split()[0] + " " + species.split()[1] else: # return None if we didn't find the accession in Uniprot, or there was no species field return None for s in seqs: try: # take "Q9Y0V5_CIOIN" from "Q9Y0V5_CIOIN blabla bla" & retrieve Uniprot record uniprot_id = s.description.split()[0] species = uniprotGetSpecies(uniprot_id) # If nothing was returned from Uniprot the first time # take only "Q9Y0V5" from "Q9Y0V5_CIOIN blabla bla" and try again if not species: uniprot_id = s.description.split()[0].split("_")[0] species = uniprotGetSpecies(uniprot_id) except IndexError: # no [genus species] in the description for this sequence continue #print "Species: ", species lineage_code = server.getLineageCode(species, kingdoms=KINGDOMS) if lineage_code == None: lineage_code = server.getLineageCode(species, kingdoms=SUPERKINGDOMS) if tag_end: print ">%s |||%s" % (s.description, lineage_code) else: print ">tax|%s|%s" % (lineage_code, s.description) print s.seq.tostring() fh.close()
else: server = SOAPpy.SOAPProxy("http://localhost:8999/") #def get_lineage_code(species, kingdoms=KINGDOMS): # lineage = server.getLineage(species) # for l in lineage: # for n in l: # #print "----------------" # for x in n: # if x in kingdoms: # return kingdoms[x] # return None for s in seqs: try: species = s.description.split("[")[1].split("]")[0].strip() except IndexError: # no [genus species] in the description for this sequence continue #print "Species: ", species lineage_code = server.getLineageCode(species, kingdoms=KINGDOMS) if lineage_code == None: lineage_code = server.getLineageCode(species, kingdoms=SUPERKINGDOMS) if tag_end: print ">%s |||%s" % (s.description, lineage_code) else: print ">tax|%s|%s" % (lineage_code, s.description) print s.seq.tostring() fh.close()