def run(self, debug=False): print("Running AmiGO:BLAST_Batch") # temp_output = open(self.outfile + "_temp", "w") if self.record_index == None: self.record_index = SeqIO.index(self.infile, "fasta") print "BLAST infile:%s" % self.infile # print self.wdir self.tempfile = self.wdir + "/AmiGO_Record.temp" go = GOConnector(seq_record=self.record_index, max_query_size=self.batch_size, e_value_cut_off=self.e_threshold, tempfile=self.tempfile, debug=self.debug) go.amigo_batch_mode() all_seqs = go.all_seqs all_orfs = dict() for seq in all_seqs: key = seq.seq_id self.results[key] = seq all_orfs[key] = seq.combined_terms # print this_seq # print this_seq.combined_terms # temp_output.write("%s \t %s\n" % (key, seq.combined_terms)) # temp_output.flush() # temp_output.close() self.counter = self.create_counter(all_orfs) # new_outfile = self.init_output(self.counter,0) # self.sample = self.update_sample_from_counters(new_outfile, self.counter) # hasattr output_csv(self.outfile, self.header, self.counter)
def test_batch_mode(self): """ syntax: http://amigo1.geneontology.org/cgi-bin/amigo/blast.cgi?action=blast&seq=%3ES1%0ATTGAAAAACCTCCGGCTATGCCGGAGGATATTTATTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGGGTTGCCGATATTCTGGAA%3ES2%0AAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCATCTGGTAGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTCTGACGGGACTCGCCGCCGCCCAGCCGGGATTTCCGCTGGCACAATTGAAAACTTTCGTCGACCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATCAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTGTTAGAAGCGCGTGGTCACAACGTTACCGTTATCGATCCGGTCGAAAAA&CMD=Put http://amigo1.geneontology.org/cgi-bin/amigo/blast.cgi?action=blast&CMD=PUT&expect=0.001&seq=%3ES1%0ATTGTTATCGATCCGGTCGAAAAA http://amigo.geneontology.org/cgi-bin/amigo/blast.cgi? action=blast&seq=%3ES1%0ATTGAAAAACCTCCGGCTATGCCGGAGGATATTTATTTC.... %3ES2%0AAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTC... &CMD=Put filter http://amigo1.geneontology.org/cgi-bin/amigo/blast.cgi?action=blast&CMD=Put&maxhits=200&threshold=0.0001&seq=%3ES1%0ATTGTTATCGATCCGGTCGAAAAA """ infile = self.data_dir + "AE014075_subTiny10.fasta" # "AE014075_subSmall100.fasta" record_index = SeqIO.index(infile, "fasta") go = GOConnector(record_index, 6) go.create_WebSessions_batches() self.assertEqual(2, len(go.web_session_list)) go = GOConnector(record_index, 3) go.create_WebSessions_batches() self.assertEqual(4, len(go.web_session_list)) go = GOConnector(record_index, 2) go.create_WebSessions_batches() self.assertEqual(5, len(go.web_session_list)) self.tempfile = self.data_dir + "AmiGO_Record.temp" try: os.remove(self.tempfile) except OSError: pass batch_size = 4 go = GOConnector(record_index, batch_size, tempfile=self.tempfile) count = go.amigo_batch_mode() expected = set(['lcl|AE014075.1_gene_1', 'lcl|AE014075.1_gene_2', 'lcl|AE014075.1_gene_3', 'lcl|AE014075.1_gene_4', 'lcl|AE014075.1_gene_5', 'lcl|AE014075.1_gene_6', 'lcl|AE014075.1_gene_7', 'lcl|AE014075.1_gene_8', 'lcl|AE014075.1_gene_9', 'lcl|AE014075.1_gene_10']) expected_count = math.ceil(10.0 / 4) self.assertEqual(expected_count, count) # 10/4 round up to 3 for seq in go.all_seqs: self.assertTrue(seq.seq_id in expected, seq.seq_id) go = GOConnector(record_index, batch_size, tempfile=self.tempfile) count = go.amigo_batch_mode() # do nothing self.assertEqual(0, count) tempout = open(self.tempfile, "r") all_lines = tempout.readlines() tempout.close() tempout = open(self.tempfile, "w") for line in all_lines: tempout.write(line) if line.startswith("ENDResult"): break tempout.close() go = GOConnector(record_index, batch_size, tempfile=self.tempfile) resume_count = go.amigo_batch_mode() self.assertEqual(expected_count - 1, resume_count)