def test_build_blast_db_from_seqs(self): """build_blast_db_from_seqs convenience function works as expected """ blast_db, db_files = build_blast_db_from_seqs(self.in_seqs1, output_dir='/tmp') self.assertTrue(blast_db.startswith('/tmp/Blast_tmp_db')) self.assertTrue(blast_db.endswith('.fasta')) expected_db_files = set([blast_db + ext\ for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(\ len(blastn(self.test_seq,blast_db=blast_db)),1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_blast_against_new_db(self): """Formatdb: blastall against a newly created DB functions as expected """ fdb = FormatDb(WorkingDir="/tmp") result = fdb(self.in_seqs1_fp) blast_res = blastn(self.test_seq, blast_db=self.in_seqs1_fp) result.cleanUp() # Test that a blast result was returned self.assertTrue("s1" in blast_res, "Not getting any blast results.") # Test that the sequence we expect was a good blast hit subject_ids = [r["SUBJECT ID"] for r in blast_res["s1"][0]] self.assertTrue("11472384" in subject_ids, "Not getting expected blast results.")
def test_blast_against_new_db(self): """Formatdb: blastall against a newly created DB functions as expected """ fdb = FormatDb(WorkingDir='/tmp') result = fdb(self.in_seqs1_fp) blast_res = blastn(self.test_seq, blast_db=self.in_seqs1_fp) result.cleanUp() # Test that a blast result was returned self.assertTrue('s1' in blast_res,\ "Not getting any blast results.") # Test that the sequence we expect was a good blast hit subject_ids = [r['SUBJECT ID'] for r in blast_res['s1'][0]] self.assertTrue('11472384' in subject_ids,\ "Not getting expected blast results.")
def test_build_blast_db_from_fasta_path_aln(self): """build_blast_db_from_fasta_path works with alignment as input """ blast_db, db_files = build_blast_db_from_fasta_path(self.in_aln1_fp) self.assertEqual(blast_db, self.in_aln1_fp) expected_db_files = set([blast_db + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db, e_value=0.0)), 1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_build_blast_db_from_fasta_path(self): """build_blast_db_from_fasta_path convenience function works as expected """ blast_db, db_files = build_blast_db_from_fasta_path(self.in_seqs1_fp) self.assertEqual(blast_db, self.in_seqs1_fp) expected_db_files = set([self.in_seqs1_fp + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db)), 1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_build_blast_db_from_fasta_path_aln(self): """build_blast_db_from_fasta_path works with alignment as input """ blast_db, db_files = build_blast_db_from_fasta_path(self.in_aln1_fp) self.assertEqual(blast_db, self.in_aln1_fp) expected_db_files = set([blast_db + ext\ for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(\ len(blastn(self.test_seq,blast_db=blast_db,e_value=0.0)),1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_build_blast_db_from_seqs(self): """build_blast_db_from_seqs convenience function works as expected """ blast_db, db_files = build_blast_db_from_seqs(self.in_seqs1, output_dir="/tmp") self.assertTrue(blast_db.startswith("/tmp/Blast_tmp_db")) self.assertTrue(blast_db.endswith(".fasta")) expected_db_files = set([blast_db + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db)), 1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_build_blast_db_from_fasta_file(self): """build_blast_db_from_fasta_file works with open files as input """ blast_db, db_files = build_blast_db_from_fasta_file(open(self.in_aln1_fp), output_dir="/tmp/") self.assertTrue(blast_db.startswith("/tmp/BLAST_temp_db")) self.assertTrue(blast_db.endswith(".fasta")) expected_db_files = set( [blast_db] + [blast_db + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]] ) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db, e_value=0.0)), 1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def pick_parents(blastdb, ref_seqs,target_sequences,nhits=10,params=None): """Pick the most likely parents for a chimeric sequence blastdb : pre-formatted db of all parents ref_seqs : aligned parent sequences target_sequences : a dict of sequences nhits : number of "best" parents to take """ for seq_id, seq in target_sequences: query_seqs = [("%d" % i, s) for i,s in \ enumerate(fractionate_sequence(seq))] blastdb = os.path.join(os.getcwd(), blastdb) result = blastn(query_seqs, blastdb) ref_db = {} for q, best_hits in result.bestHitsByQuery(n=nhits): for rec in best_hits: id_ = rec['SUBJECT ID'] ref_db[rec['SUBJECT ID']] = ref_seqs[id_] if not ref_db: continue yield (ref_db,seq_id, seq)
def pick_parents(blastdb, ref_seqs, target_sequences, nhits=10, params=None): """Pick the most likely parents for a chimeric sequence blastdb : pre-formatted db of all parents ref_seqs : aligned parent sequences target_sequences : a dict of sequences nhits : number of "best" parents to take """ for seq_id, seq in target_sequences: query_seqs = [("%d" % i, s) for i,s in \ enumerate(fractionate_sequence(seq))] blastdb = os.path.join(os.getcwd(), blastdb) result = blastn(query_seqs, blastdb) ref_db = {} for q, best_hits in result.bestHitsByQuery(n=nhits): for rec in best_hits: id_ = rec['SUBJECT ID'] ref_db[rec['SUBJECT ID']] = ref_seqs[id_] if not ref_db: continue yield (ref_db, seq_id, seq)
def test_build_blast_db_from_fasta_file(self): """build_blast_db_from_fasta_file works with open files as input """ blast_db, db_files = \ build_blast_db_from_fasta_file(open(self.in_aln1_fp),output_dir='/tmp/') self.assertTrue(blast_db.startswith('/tmp/BLAST_temp_db')) self.assertTrue(blast_db.endswith('.fasta')) expected_db_files = set([blast_db] + [blast_db + ext\ for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(\ len(blastn(self.test_seq,blast_db=blast_db,e_value=0.0)),1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_build_blast_db_from_fasta_path(self): """build_blast_db_from_fasta_path convenience function works as expected """ blast_db, db_files = \ build_blast_db_from_fasta_path(self.in_seqs1_fp) self.assertEqual(blast_db, self.in_seqs1_fp) expected_db_files = set([self.in_seqs1_fp + ext\ for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(\ len(blastn(self.test_seq,blast_db=blast_db)),1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))