コード例 #1
0
    def test_gather_data(self):
        '''test gather_data'''
        lines = [
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
            'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
        ]

        data_dicts = [
            summary_cluster.SummaryCluster.line2dict(x) for x in lines
        ]
        cluster = summary_cluster.SummaryCluster()
        for data_dict in data_dicts:
            cluster.add_data_dict(data_dict)

        cluster.gather_data()
        expected_summary = {
            'assembled': 'yes',
            'match': 'yes',
            'ref_seq': 'ref1',
            'pct_id': '98.33',
            'known_var': 'yes',
            'novel_var': 'no',
        }
        self.assertEqual(expected_summary, cluster.summary)

        cluster_vars = [
            summary_cluster_variant.SummaryClusterVariant(x)
            for x in data_dicts
        ]
        expected_variants = {x for x in cluster_vars if x.has_nonsynon}
        self.assertEqual(expected_variants, cluster.variants)
コード例 #2
0
    def test_init(self):
        '''test __init__'''
        lines = [
            'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\t.\t.\t13\t13\tA\t84\t84\tA\t100\tA,T\t90,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
        ]

        expected = [
            {
                'coding': True,
                'known': True,
                'var_string': 'I14L',
                'var_group': '.',
                'het_percent': None
            },
            {
                'coding': False,
                'known': True,
                'var_string': '14T',
                'var_group': 'id1',
                'het_percent': 100.0
            },
            {
                'coding': False,
                'known': True,
                'var_string': '14AT',
                'var_group': 'id1',
                'het_percent': 25.0
            },
            {
                'coding': False,
                'known': True,
                'var_string': '14AGT',
                'var_group': 'id1',
                'het_percent': 50.0
            },
            {
                'coding': False,
                'known': True,
                'var_string': '14AT',
                'var_group': 'id1',
                'het_percent': 10.0
            },
        ]
        assert len(lines) == len(expected)

        for i in range(len(lines)):
            data_dict = summary_cluster.SummaryCluster.line2dict(lines[i])
            cluster_var = summary_cluster_variant.SummaryClusterVariant(
                data_dict)
            for key in expected[i]:
                got_value = eval('cluster_var.' + key)
                self.assertEqual(expected[i][key], got_value)
コード例 #3
0
    def _get_all_nonsynon_variants_set(cls, data_dicts):
        variants = set()

        for data_dict in data_dicts:
            cluster_var = summary_cluster_variant.SummaryClusterVariant(
                data_dict)
            if cluster_var.has_nonsynon or cluster_var.is_het:
                variants.add(cluster_var)

        return variants
コード例 #4
0
 def _has_known_variant(cls, data_dict):
     if data_dict['has_known_var'] == '1':
         return 'yes'
     elif data_dict['known_var'] == '0':
         return 'no'
     elif data_dict['gene'] == '1':  # we don't yet call hets in genes
         return 'no'
     else:
         cluster_var = summary_cluster_variant.SummaryClusterVariant(
             data_dict)
         return 'het' if cluster_var.is_het else 'no'
コード例 #5
0
    def _has_nonsynonymous(cls, data_dict):
        cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict)

        has_non_het = data_dict['ref_ctg_effect'] != 'SYN' and \
        (
            data_dict['has_known_var'] == '1' or \
            (data_dict['known_var'] != '1' and (data_dict['ref_ctg_change'] != '.' or data_dict['ref_ctg_effect'] != '.'))
        )

        if has_non_het and not cluster_var.is_het:
            return 'yes'
        else:
            return 'het' if cluster_var.is_het else 'no'
コード例 #6
0
    def test_get_all_nonsynon_variants_set(self):
        '''test _get_all_nonsynon_variants_set'''
        lines = [
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
            'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
        ]

        data_dicts = [
            summary_cluster.SummaryCluster.line2dict(x) for x in lines
        ]

        cluster_vars = [
            summary_cluster_variant.SummaryClusterVariant(x)
            for x in data_dicts
        ]
        expected = {x for x in cluster_vars if x.has_nonsynon}
        got = summary_cluster.SummaryCluster._get_all_nonsynon_variants_set(
            data_dicts)
        self.assertEqual(expected, got)