def test_gather_data(self): '''test gather_data''' lines = [ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text', 'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', ] data_dicts = [ summary_cluster.SummaryCluster.line2dict(x) for x in lines ] cluster = summary_cluster.SummaryCluster() for data_dict in data_dicts: cluster.add_data_dict(data_dict) cluster.gather_data() expected_summary = { 'assembled': 'yes', 'match': 'yes', 'ref_seq': 'ref1', 'pct_id': '98.33', 'known_var': 'yes', 'novel_var': 'no', } self.assertEqual(expected_summary, cluster.summary) cluster_vars = [ summary_cluster_variant.SummaryClusterVariant(x) for x in data_dicts ] expected_variants = {x for x in cluster_vars if x.has_nonsynon} self.assertEqual(expected_variants, cluster.variants)
def test_init(self): '''test __init__''' lines = [ 'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\t.\t.\t13\t13\tA\t84\t84\tA\t100\tA,T\t90,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', ] expected = [ { 'coding': True, 'known': True, 'var_string': 'I14L', 'var_group': '.', 'het_percent': None }, { 'coding': False, 'known': True, 'var_string': '14T', 'var_group': 'id1', 'het_percent': 100.0 }, { 'coding': False, 'known': True, 'var_string': '14AT', 'var_group': 'id1', 'het_percent': 25.0 }, { 'coding': False, 'known': True, 'var_string': '14AGT', 'var_group': 'id1', 'het_percent': 50.0 }, { 'coding': False, 'known': True, 'var_string': '14AT', 'var_group': 'id1', 'het_percent': 10.0 }, ] assert len(lines) == len(expected) for i in range(len(lines)): data_dict = summary_cluster.SummaryCluster.line2dict(lines[i]) cluster_var = summary_cluster_variant.SummaryClusterVariant( data_dict) for key in expected[i]: got_value = eval('cluster_var.' + key) self.assertEqual(expected[i][key], got_value)
def _get_all_nonsynon_variants_set(cls, data_dicts): variants = set() for data_dict in data_dicts: cluster_var = summary_cluster_variant.SummaryClusterVariant( data_dict) if cluster_var.has_nonsynon or cluster_var.is_het: variants.add(cluster_var) return variants
def _has_known_variant(cls, data_dict): if data_dict['has_known_var'] == '1': return 'yes' elif data_dict['known_var'] == '0': return 'no' elif data_dict['gene'] == '1': # we don't yet call hets in genes return 'no' else: cluster_var = summary_cluster_variant.SummaryClusterVariant( data_dict) return 'het' if cluster_var.is_het else 'no'
def _has_nonsynonymous(cls, data_dict): cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict) has_non_het = data_dict['ref_ctg_effect'] != 'SYN' and \ ( data_dict['has_known_var'] == '1' or \ (data_dict['known_var'] != '1' and (data_dict['ref_ctg_change'] != '.' or data_dict['ref_ctg_effect'] != '.')) ) if has_non_het and not cluster_var.is_het: return 'yes' else: return 'het' if cluster_var.is_het else 'no'
def test_get_all_nonsynon_variants_set(self): '''test _get_all_nonsynon_variants_set''' lines = [ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text', 'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', ] data_dicts = [ summary_cluster.SummaryCluster.line2dict(x) for x in lines ] cluster_vars = [ summary_cluster_variant.SummaryClusterVariant(x) for x in data_dicts ] expected = {x for x in cluster_vars if x.has_nonsynon} got = summary_cluster.SummaryCluster._get_all_nonsynon_variants_set( data_dicts) self.assertEqual(expected, got)