def test_blast_record_set(self): # prepare database seqfile_ops.write_fasta(self.db_file, self.db_records) db_records_list = seqfile_ops.load_multifasta(self.db_file) index = 0 for record in db_records_list: self.assertEqual(record.id,self.db_records[index].id) self.assertEqual(str(record.seq),str(self.db_records[index].seq)) index +=1 # make database self.dbfile_path, db_report = blasting.make_blastDB(self.temp_dir, self.db_name, self.db_file, 'nucl') self.assertIs(db_report['status'], 0) self.assertEquals(db_report['message'], 'database exists') # run local blast batch (with multiple queries) matches_multi = blasting.blast_record_set(self.dbfile_path, self.multi_records, self.prefs) self.assertIs(len(matches_multi), 3) index = 0 for record in self.multi_records: self.assertEqual(matches_multi[record.id][0]['contig_id'], self.multi_records[index].id) self.assertEqual(matches_multi[record .id][0]['details']['match_p100'], 100) index +=1
def genome_sets_load(genomes_path, input_file, input_prefs, db_path): """Load genome datasets listed in an input file.""" import os, sys from classes.analysis_obj import GenomeSet from analysis.seqfile_ops import ensure_fasta from analysis.text_manipulation import adaptive_list_load from analysis.blasting import make_blastDB header = input_prefs['header'] columns = input_prefs['columns'] genomes_list = adaptive_list_load(input_file, header, columns) print "prepping BLAST databases" genome_sets = [] for line in genomes_list: genome_name = line[0] seq_file = os.path.join(genomes_path, line[1]) try: db_infile = ensure_fasta(seq_file) except: raise else: print "genome FASTA sequence available in", db_infile dbfile_path, DB_report = make_blastDB(db_path, genome_name, seq_file, 'nucl') if DB_report['status'] is 1: print genome_name, ":", DB_report['message']['error'] sys.exit() elif DB_report['status'] is 0: print genome_name, ":", DB_report['message'] new_genome_set = GenomeSet(db_infile, genome_name) genome_sets.append(new_genome_set) print " ", len(genome_sets),"databases ready to search" return genome_sets
def test_local_blastn(self): # prepare query seqfile_ops.write_fasta(self.single_q_file, self.single_record) query_record = seqfile_ops.load_fasta(self.single_q_file) self.assertEqual(query_record.id,self.record_1.id) self.assertEqual(str(query_record.seq),str(self.record_1.seq)) # prepare database seqfile_ops.write_fasta(self.db_file, self.db_records) records_list = seqfile_ops.load_multifasta(self.db_file) index = 0 for record in records_list: self.assertEqual(record.id,self.db_records[index].id) self.assertEqual(str(record.seq),str(self.db_records[index].seq)) index +=1 # make database self.dbfile_path, db_report = blasting.make_blastDB(self.temp_dir, self.db_name, self.db_file, 'nucl') self.assertIs(db_report['status'], 0) self.assertEquals(db_report['message'], 'database exists') # run local blast with single query self.status = blasting.local_blastn(self.single_q_file, self.single_out_file, self.dbfile_path, self.prefs) self.assertEquals(self.status['output'], '') self.assertIsNone(self.status['error']) # parse blast output matches_single = blasting.parse_blast_out6(self.single_out_file, self.prefs) self.assertIs(len(matches_single), 1) self.assertEqual(matches_single[0]['contig_id'], self.single_record.id) self.assertEqual(matches_single[0]['details']['match_p100'], 100)