def test_load_file(self): '''Test _load_file''' infile = os.path.join(data_dir, 'summary_sample_test_load_file.in.tsv') with open(infile) as f: lines = [x.rstrip() for x in f] dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines[1:]] cluster1 = summary_cluster.SummaryCluster() cluster1.add_data_dict(dicts[0]) cluster1.add_data_dict(dicts[1]) cluster1.add_data_dict(dicts[2]) cluster1.gather_data() cluster2 = summary_cluster.SummaryCluster() cluster2.add_data_dict(dicts[3]) cluster2.add_data_dict(dicts[4]) cluster2.gather_data() cluster3 = summary_cluster.SummaryCluster() cluster3.add_data_dict(dicts[5]) cluster3.gather_data() expected = { 'cluster.n': cluster1, 'cluster.p': cluster2, 'cluster.v': cluster3 } got = summary_sample.SummarySample._load_file(infile, 90) self.assertEqual(expected, got) got = summary_sample.SummarySample._load_file(infile, 90, only_clusters={'cluster.n'}) expected = {'cluster.n': cluster1} self.assertEqual(expected, got)
def test_gather_data(self): '''test gather_data''' lines = [ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text', 'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', ] data_dicts = [ summary_cluster.SummaryCluster.line2dict(x) for x in lines ] cluster = summary_cluster.SummaryCluster() for data_dict in data_dicts: cluster.add_data_dict(data_dict) cluster.gather_data() expected_summary = { 'assembled': 'yes', 'match': 'yes', 'ref_seq': 'ref1', 'pct_id': '98.33', 'known_var': 'yes', 'novel_var': 'no', } self.assertEqual(expected_summary, cluster.summary) cluster_vars = [ summary_cluster_variant.SummaryClusterVariant(x) for x in data_dicts ] expected_variants = {x for x in cluster_vars if x.has_nonsynon} self.assertEqual(expected_variants, cluster.variants)
def test_known_noncoding_het_snps(self): '''test known_noncoding_het_snps''' lines = [ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs' ] cluster = summary_cluster.SummaryCluster() for line in lines: cluster.add_data_dict( summary_cluster.SummaryCluster.line2dict(line)) got = cluster.known_noncoding_het_snps() expected = { '.': { 'A82T': 40.0 }, 'id1': { 'A42T': 25.0, 'A14T': 100.0 }, 'id2': { 'A62T': 75.0 }, } self.assertEqual(expected, got)
def test_has_match(self): '''Test _has_match''' lines = [ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text', ] expected = [ 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'no', 'no' ] for i in range(len(lines)): data_dict = summary_cluster.SummaryCluster.line2dict(lines[i]) cluster = summary_cluster.SummaryCluster() cluster.add_data_dict(data_dict) for assembled_summary in ['yes', 'yes_nonunique']: self.assertEqual(expected[i], cluster._has_match(assembled_summary)) for assembled_summary in ['no', 'fragmented']: self.assertEqual('no', cluster._has_match(assembled_summary))
def _load_file(filename, min_pc_id, only_clusters=None): f = pyfastaq.utils.open_file_read(filename) clusters = {} for line in f: if line.startswith('#'): if line.rstrip()[1:].split('\t') != report.columns: pyfastaq.utils.close(f) raise Error('Error parsing the following line.\n' + line) continue data_dict = summary_cluster.SummaryCluster.line2dict( line, filename=filename) cluster = data_dict['cluster'] if only_clusters is not None and cluster not in only_clusters: continue if cluster not in clusters: clusters[cluster] = summary_cluster.SummaryCluster( min_pc_id=min_pc_id) clusters[cluster].add_data_dict(data_dict) pyfastaq.utils.close(f) to_delete = set() for cluster_name, cluster in clusters.items(): cluster.gather_data() if cluster.name is None: to_delete.add(cluster_name) for name in to_delete: del clusters[name] return clusters
def test_non_synon_variants(self): '''Test non_synon_variants''' line1 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs' line2 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text' data_dict1 = summary_cluster.SummaryCluster.line2dict(line1) data_dict2 = summary_cluster.SummaryCluster.line2dict(line2) cluster = summary_cluster.SummaryCluster() cluster.add_data_dict(data_dict1) cluster.add_data_dict(data_dict2) got = cluster.non_synon_variants() expected = {('ref1', 'A14T', 'grouped', 'id1')} self.assertEqual(expected, got)
def test_pc_id_of_longest(self): '''Test pc_id_of_longest''' cluster = summary_cluster.SummaryCluster() self.assertTrue(cluster.name is None) line1 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text' line2 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t119\t98.20\tctg_name2\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text' line3 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t114\t98.32\tctg_name3\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text' data_dict1 = summary_cluster.SummaryCluster.line2dict(line1) data_dict2 = summary_cluster.SummaryCluster.line2dict(line2) data_dict3 = summary_cluster.SummaryCluster.line2dict(line3) cluster.add_data_dict(data_dict1) cluster.add_data_dict(data_dict2) cluster.add_data_dict(data_dict3) self.assertEqual(98.2, cluster.pc_id_of_longest())
def test_has_any_part_of_ref_assembled(self): '''Test _has_any_part_of_ref_assembled''' line1 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t.\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text' line2 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t0\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text' line3 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text' data_dict1 = summary_cluster.SummaryCluster.line2dict(line1) data_dict2 = summary_cluster.SummaryCluster.line2dict(line2) data_dict3 = summary_cluster.SummaryCluster.line2dict(line3) cluster = summary_cluster.SummaryCluster() cluster.add_data_dict(data_dict1) self.assertFalse(cluster._has_any_part_of_ref_assembled()) cluster.add_data_dict(data_dict2) self.assertFalse(cluster._has_any_part_of_ref_assembled()) cluster.add_data_dict(data_dict3) self.assertTrue(cluster._has_any_part_of_ref_assembled())
def test_add_data_dict(self): '''Test add_data_dict''' cluster = summary_cluster.SummaryCluster() self.assertTrue(cluster.name is None) line1 = 'ariba_refname1\trefname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text' line2 = 'ariba_refname1\trefname\t1\t0\t19\t78\tcluster2\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id2:ref has wild type, foo bar\tsome free text' line3 = 'ariba_refname2\trefname2\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text' data_dict1 = summary_cluster.SummaryCluster.line2dict(line1) data_dict2 = summary_cluster.SummaryCluster.line2dict(line2) data_dict3 = summary_cluster.SummaryCluster.line2dict(line3) cluster.add_data_dict(data_dict1) self.assertEqual(cluster.name, data_dict1['cluster']) self.assertEqual(cluster.data, [data_dict1]) with self.assertRaises(summary_cluster.Error): cluster.add_data_dict(data_dict2) with self.assertRaises(summary_cluster.Error): cluster.add_data_dict(data_dict3)
def test_has_any_known_variant(self): lines = [ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.', 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.', 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', ] expected = ['yes', 'no', 'no', 'no', 'no', 'het'] assert len(lines) == len(expected) for i in range(len(lines)): data_dict = summary_cluster.SummaryCluster.line2dict(lines[i]) cluster = summary_cluster.SummaryCluster() cluster.add_data_dict(data_dict) self.assertEqual(expected[i], cluster._has_any_known_variant())
def test_column_summary_data(self): '''Test column_summary_data''' line1 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:n:A14T:id1:foo_bar\tspam eggs' line2 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text' data_dict1 = summary_cluster.SummaryCluster.line2dict(line1) data_dict2 = summary_cluster.SummaryCluster.line2dict(line2) cluster = summary_cluster.SummaryCluster() cluster.add_data_dict(data_dict1) cluster.add_data_dict(data_dict2) expected = { 'assembled': 'yes', 'match': 'yes', 'ref_seq': 'ref1', 'novel_var': 'no', 'known_var': 'yes', 'pct_id': '98.33', } got = cluster.column_summary_data() self.assertEqual(expected, got)
def test_has_any_nonsynonymous(self): '''Test _has_any_nonsynonymous''' lines = [ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:N_ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs', ] expected = ['no', 'yes', 'no', 'yes', 'yes', 'het', 'het'] assert len(lines) == len(expected) for i in range(len(lines)): data_dict = summary_cluster.SummaryCluster.line2dict(lines[i]) cluster = summary_cluster.SummaryCluster() cluster.add_data_dict(data_dict) self.assertEqual(expected[i], cluster._has_any_nonsynonymous())
def test_has_var_groups(self): '''Test has_var_groups''' lines = [ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id2:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id4:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id5:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id6:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text', 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text', ] dicts = [ summary_cluster.SummaryCluster.line2dict(line) for line in lines ] cluster = summary_cluster.SummaryCluster() for d in dicts: cluster.add_data_dict(d) got = cluster.has_var_groups() expected = {'id1', 'id3', 'id6'} self.assertEqual(expected, got)
def test_to_cluster_summary_number(self): '''Test _to_cluster_summary_assembled''' line = 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text' data_dict = summary_cluster.SummaryCluster.line2dict(line) tests = [ ('0', 0, 'partial'), ('0', 64, 'no'), ('0', 1024, 'no'), ('0', 1, 'fragmented'), ('0', 3, 'yes_nonunique'), ('0', 19, 'yes'), ('0', 23, 'yes_nonunique'), ('0', 51, 'yes_nonunique'), ('0', 147, 'yes_nonunique'), ('0', 275, 'yes_nonunique'), ('1', 0, 'partial'), ('1', 64, 'no'), ('1', 1024, 'no'), ('1', 1, 'fragmented'), ('1', 11, 'yes_nonunique'), ('1', 27, 'yes'), ('1', 29, 'fragmented'), ('1', 59, 'yes_nonunique'), ('1', 155, 'yes_nonunique'), ('1', 283, 'yes_nonunique'), ] for gene, f, expected in tests: cluster = summary_cluster.SummaryCluster() data_dict['gene'] = gene data_dict['flag'] = flag.Flag(f) cluster.add_data_dict(data_dict) self.assertEqual(expected, cluster._to_cluster_summary_assembled()) if expected == 'partial': original_number = cluster.data[0]['ref_base_assembled'] cluster.data[0]['ref_base_assembled'] = 0 self.assertEqual('no', cluster._to_cluster_summary_assembled()) cluster.data[0]['ref_base_assembled'] = original_number