def test_blastparse(self): queries, clusters = core.blastparse(self.sample_data, Record(), 0, 0) # check we process the right number of queries self.assertEqual(len(queries), len(set([i[0] for i in self.sample_data_as_lists]))) # check we have entries for every gene_cluster we found subjects = [self.parse_subject_wrapper(i) for i in self.sample_data_as_lists] self.verify_subjects_and_clusters_represented(subjects, clusters) # test perc_coverage threshold (value arbitrary due to mocking) coverage_threshold = 650 queries, clusters = core.blastparse(self.sample_data, Record(), coverage_threshold, 0) new_subjects = [s for s in subjects if s.perc_coverage > coverage_threshold] assert new_subjects and len(new_subjects) < len(subjects), "coverage test has become meaningless" self.verify_subjects_and_clusters_represented(new_subjects, clusters) # test perc_identity threshold ident_threshold = 35 queries, clusters = core.blastparse(self.sample_data, Record(), 0, ident_threshold) new_subjects = [s for s in subjects if s.perc_ident > ident_threshold] assert new_subjects and len(new_subjects) < len(subjects), "identity% test has become meaningless" self.verify_subjects_and_clusters_represented(new_subjects, clusters) # test combo threshold queries, clusters = core.blastparse(self.sample_data, Record(), coverage_threshold, ident_threshold) new_subjects = [s for s in subjects if s.perc_ident > ident_threshold and s.perc_coverage > coverage_threshold] assert new_subjects and len(new_subjects) < len(subjects), "combo test has become meaningless" self.verify_subjects_and_clusters_represented(new_subjects, clusters)
def test_blastparse_on_empty(self): for blast in ["", "\n", "\r\n", "\n\n"]: queries, clusters = core.blastparse(blast, Record(), 0, 0) self.assertEqual(len(queries), 0) self.assertEqual(len(clusters), 0)