def reverse(master_file, organism): """ Reverse blast a fasta file of multiple sequences against the database of an organism. """ # grab just the name filename = os.path.splitext(master_file)[0] fastas = split_fasta(master_file) print("Total number of sequences before blasting: " + str(len(fastas))) # Run individual blasts processes = [query(f + ".fasta", f +"_blast.txt", entrez_query=organism) for f in fastas] good_fastas = [] bad_fastas = [] for f in fastas: found = organism_in_blast(organism, f+ "_blast.txt") if found: good_fastas.append(f) else: bad_fastas.append(f) g = open(filename + "_reversed.fasta", 'w') for fasta in good_fastas: f = open(fasta + ".fasta", "r") g.write(f.read()) f.close() g.close() print("Final number of sequences after blasting: " + str(len(good_fastas))) for f in bad_fastas: os.remove(f + ".fasta") os.remove(f + "_blast.txt") print("Done!")
def seeds(fasta, as_homologset=True, rm_blast=False, **kwargs): """ Blast a set of seed sequences. Arguments: --------- fasta : str filename for fasta containing seed sequences. as_homologset: bool [default=true] Convert blast results to homolog set. kwargs are passed to blasting method. """ # grab just the name filename = os.path.splitext(fasta)[0] fastas = split_fasta(fasta) print("Total number of sequences before blasting: " + str(len(fastas))) # Make a directory for storing the blast results. cwd = os.getcwd() blastpath = os.path.join(cwd, "blast") os.mkdir(blastpath) # Run individual blasts outnames = [] for f in fastas: # Make filenames iname = f + ".fasta" oname = os.path.join(blastpath, f + "_blast.txt") # Send query to NCBI process = query(iname, oname, kwargs) outnames.append(oname) # If homolog_set should be made, return homolog_set if as_homologset: # Convert to homologset homologset = to_homologset(outnames, tag_list=DEFAULTS) return homologset
def seeds(fasta, as_homologset=True, rm_blast=False, **kwargs): """ Blast a set of seed sequences. Arguments: --------- fasta : str filename for fasta containing seed sequences. as_homologset: bool [default=true] Convert blast results to homolog set. kwargs are passed to blasting method. """ # grab just the name filename = os.path.splitext(fasta)[0] fastas = split_fasta(fasta) print("Total number of sequences before blasting: " + str(len(fastas))) # Make a directory for storing the blast results. cwd = os.getcwd() blastpath = os.path.join(cwd, "blast") os.mkdir(blastpath) # Run individual blasts outnames = [] for f in fastas: # Make filenames iname = f+".fasta" oname = os.path.join(blastpath, f+"_blast.txt") # Send query to NCBI process = query(iname, oname, kwargs) outnames.append(oname) # If homolog_set should be made, return homolog_set if as_homologset: # Convert to homologset homologset = to_homologset(outnames, tag_list=DEFAULTS) return homologset
def reverse(master_file, organism): """ Reverse blast a fasta file of multiple sequences against the database of an organism. """ # grab just the name filename = os.path.splitext(master_file)[0] fastas = split_fasta(master_file) print("Total number of sequences before blasting: " + str(len(fastas))) # Run individual blasts processes = [ query(f + ".fasta", f + "_blast.txt", entrez_query=organism) for f in fastas ] good_fastas = [] bad_fastas = [] for f in fastas: found = organism_in_blast(organism, f + "_blast.txt") if found: good_fastas.append(f) else: bad_fastas.append(f) g = open(filename + "_reversed.fasta", 'w') for fasta in good_fastas: f = open(fasta + ".fasta", "r") g.write(f.read()) f.close() g.close() print("Final number of sequences after blasting: " + str(len(good_fastas))) for f in bad_fastas: os.remove(f + ".fasta") os.remove(f + "_blast.txt") print("Done!")