def test_parse_all_multi_cluster(self): # test we partition correctly by cluster number sample_data = self.read_sample_data("data/diamond_output_sample_multicluster.txt") clusters_by_number, queries_by_number = core.parse_all_clusters(sample_data, Record(), 0, 0) self.assertEqual(len(clusters_by_number), 3) self.assertEqual(sorted(clusters_by_number), [1, 2, 4]) self.assertEqual(len(queries_by_number), 3) self.assertEqual(sorted(queries_by_number), [1, 2, 4]) for i in [1, 2, 4]: self.assertEqual(len(clusters_by_number[i]), i) self.assertEqual(len(queries_by_number[i]), i)
def parse_all_wrapper(coverage_threshold, ident_threshold): clusters_by_number, queries_by_number = core.parse_all_clusters(self.sample_data, Record(), coverage_threshold, ident_threshold) # make sure we only found one cluster number self.assertEqual(len(clusters_by_number), 1) self.assertEqual(list(clusters_by_number), [24]) self.assertEqual(len(queries_by_number), 1) self.assertEqual(list(queries_by_number), [24]) # now test the values of those queries queries = queries_by_number[24] clusters = clusters_by_number[24] return queries, clusters
def test_parse_all_empty(self): for sample_data in ["", "\n", "\r\n", "\n\n"]: clusters, queries = core.parse_all_clusters( sample_data, Record(), 0, 0) self.assertEqual(len(clusters), 0) self.assertEqual(len(queries), 0)