def test_multi_processing_blast(self): """ Test that a set of blast runs using multiprocessing run """ fn_database = os.path.join(self.datadir, "mini_nr", "nr_test2") blaster = BLASTUtilities.BLASTMultiProcessing() parser = SeqIO.parse(fn_database, "fasta") identifier = "temp.{0}" i = 0 n_seqs = 20 for seq_record in parser: if i == n_seqs: break blaster.add_sequence(seq_record.seq.tostring(), identifier.format(i), fn_database) i += 1 fn_identifier_pairs = blaster.run() self.assertEqual(len(fn_identifier_pairs), n_seqs, "Unexpected number of BLAST results") blast_parser = BLASTUtilities.BLASTMultiProcessingParser() for i, fn in fn_identifier_pairs: blast_parser.add_file(identifier.format(i), fn) parsing_results = blast_parser.run() l = len(parsing_results) self.assertEqual(l, n_seqs, "Unexpected number of parsed results {0}".format(l)) for i, fn in fn_identifier_pairs: os.remove(fn)
def blast(seqs): """ Blast a set of sequences and parse the results. The function does calls the MultiProcessing versions @seqs A list of tuples of (sequence, identifier for the sequence, database to use for the blast procedure) @return Returns a list of BLASTResult objects """ if len(seqs) == 0: raise ValueError("No sequences provided") blaster = BLASTUtilities.BLASTMultiProcessing() log.debug("Running blast from %s sequences",len(seqs)) for seq in seqs: blaster.add_sequence(*seq) fns_blast_output = blaster.run() parser = BLASTUtilities.BLASTMultiProcessingParser() for identifier, fn in fns_blast_output: parser.add_file(identifier, fn) parsing_results = parser.run() # clean xmls after parsing for i,fn in fns_blast_output: os.remove(fn) return parsing_results