Python Analyze.data Examples

Programming Language: Python

Namespace/Package Name: hgtector.analyze

Class/Type: Analyze

Method/Function: data

Examples at hotexamples.com: 4

Python Analyze.data - 4 examples found. These are the top rated real world Python examples of hgtector.analyze.Analyze.data extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Analyze(22)

df(9)

output(6)

data(4)

bw_steps(4)

low_part(3)

groups(3)

noise(3)

density_func(3)

bandwidth(3)

close_tax(2)

close_size(2)

donor_name(2)

donor_rank(2)

first_hill(2)

match_th(2)

calc_cluster_props(2)

lca(2)

input_tax(2)

outliers_zscore(1)

outliers_boxplot(1)

input_map(1)

outliers(1)

perform_kde(1)

make_score_table(1)

grid_kde(1)

input_cov(1)

input(1)

infer_self_group(1)

infer_genome_tax(1)

infer_close_group(1)

assign_taxonomy(1)

fixed(1)

find_match(1)

define_groups(1)

cluster_kde(1)

cfg(1)

calc_scores(1)

plot_density(1)

Example #1

Show file

    def test_sum_taxids(self):
        me = Analyze()
        me.input_tax = {'S1': '1', 'S2': '3'}

        def _hits_df(d):
            return pd.Series(d, name='taxid').to_frame()

        me.data = {
            'S1': [{
                'hits': _hits_df({
                    'a': '4',
                    'b': '6'
                })
            }, {
                'hits': _hits_df({
                    'a': '4',
                    'c': '8'
                })
            }],
            'S2': [{
                'hits': _hits_df({
                    'b': '6',
                    'd': '1'
                })
            }]
        }
        obs = me.sum_taxids()
        exp = {'1', '3', '4', '6', '8'}
        self.assertSetEqual(obs, exp)

Example #2

Show file

    def test_calc_scores(self):
        columns = ('id', 'taxid', 'score')

        # helper for making hit table
        def _hits_df(data):
            return pd.DataFrame(data, columns=columns).set_index('id')

        me = Analyze()
        me.taxdump = taxdump_from_text(taxdump_proteo)
        add_children(me.taxdump)
        me.groups = {
            'self': {'561', '562', '585056'},
            'close': {'543', '91347', '1236'}
        }
        me.data = {
            'S1': [{
                'score': 100,
                'hits': _hits_df((('P1', '561', 100), ('P2', '562', 95)))
            }, {
                'score': 90,
                'hits': _hits_df((('P3', '561', 81), ('P4', '543', 72)))
            }],
            'S2': [{
                'score':
                96,
                'hits':
                _hits_df(
                    (('P5', '561', 90), ('P6', '543', 84), ('P7', '620', 66)))
            }]
        }
        me.weighted = True
        me.match_th = 0.9
        me.calc_scores()

        # helper for get scores
        def _prot_scores(prot):
            return [prot[x] for x in ('self', 'close', 'distal')]

        s1_1 = me.data['S1'][0]
        self.assertListEqual(s1_1['hits']['group'].tolist(), ['self', 'self'])
        self.assertListEqual(_prot_scores(s1_1), [1.95, 0.0, 0.0])
        self.assertEqual(s1_1['match'], '0')
        s1_2 = me.data['S1'][1]
        self.assertListEqual(s1_2['hits']['group'].tolist(), ['self', 'close'])
        self.assertListEqual(_prot_scores(s1_2), [0.9, 0.8, 0.0])
        self.assertEqual(s1_2['match'], '0')
        s2_1 = me.data['S2'][0]
        self.assertListEqual(s2_1['hits']['group'].tolist(),
                             ['self', 'close', 'distal'])
        self.assertListEqual(_prot_scores(s2_1), [0.9375, 0.875, 0.6875])
        self.assertEqual(s2_1['match'], '620')

Example #3

Show file

 def test_make_score_table(self):
     me = Analyze()
     me.output = self.tmpdir
     me.data = {
         'S1': [{
             'id': 'P1',
             'length': 100,
             'match': '0',
             'self': 1.5,
             'close': 0.75,
             'distal': 0.0,
             'hits': pd.DataFrame([0] * 3)
         }, {
             'id': 'P2',
             'length': 120,
             'match': '1224',
             'self': 1.625,
             'close': 0.225,
             'distal': 0.375,
             'hits': pd.DataFrame([0] * 5)
         }],
         'S2': [{
             'id': 'P1',
             'length': 225,
             'match': '620',
             'self': 2.35,
             'close': 1.05,
             'distal': 0.75,
             'hits': pd.DataFrame([0] * 6)
         }]
     }
     me.make_score_table()
     obs = me.df.values.tolist()
     exp = [['S1', 'P1', 100, 3, 1.5, 0.75, 0, '0'],
            ['S1', 'P2', 120, 5, 1.625, 0.225, 0.375, '1224'],
            ['S2', 'P1', 225, 6, 2.35, 1.05, 0.75, '620']]
     self.assertListEqual(obs, exp)
     fp = join(self.tmpdir, 'scores.tsv')
     with open(fp, 'r') as f:
         obs = [x.split('\t') for x in f.read().splitlines()[1:]]
     exp = [[str(y) for y in x] for x in exp]
     self.assertListEqual(obs, exp)
     remove(fp)

Example #4

Show file

    def test_assign_taxonomy(self):
        # input are two genomes with defined taxonomy
        me = Analyze()
        me.input_tax = 'S1:561,S2:620'  # Escherichia and Shigella
        me.data = {}
        me.taxdump = taxdump_from_text(taxdump_proteo)
        me.assign_taxonomy()
        # test input taxonomy extraction
        self.assertDictEqual(me.input_tax, {'S1': '561', 'S2': '620'})
        # test taxonomy refinement
        exp = {
            '1', '131567', '2', '1224', '1236', '91347', '543', '561', '620'
        }
        self.assertSetEqual(set(me.taxdump.keys()), exp)
        # test LCA discovery
        self.assertEqual(me.lca, '543')

        # helper for making hit table
        def _hits_df(d):
            return pd.Series(d, name='taxid', dtype=object).to_frame()

        # input is one genome with defined taxonomy
        me = Analyze()
        me.data = {'S1': [{'hits': pd.DataFrame(columns=['taxid'])}]}
        me.input_tax = '561'  # Escherichia
        me.taxdump = taxdump_from_text(taxdump_proteo)
        me.assign_taxonomy()
        self.assertDictEqual(me.input_tax, {'S1': '561'})

        # input taxonomy not found in database
        me.input_tax = '1234'
        me.taxdump = taxdump_from_text(taxdump_proteo)
        with self.assertRaises(ValueError) as ctx:
            me.assign_taxonomy()
        msg = 'TaxID 1234 is not present in taxonomy database.'
        self.assertEqual(str(ctx.exception), msg)

        # input are two genome whose taxonomies are to be inferred based on
        # search results
        me = Analyze()
        me.input_tax = None
        me.data = {
            'S1': [{
                'hits': _hits_df({
                    'P1': '561',
                    'P2': '562'
                })
            }, {
                'hits': _hits_df({
                    'P3': '543',
                    'P4': '561'
                })
            }],
            'S2': [{
                'hits': _hits_df({
                    'P5': '562',
                    'P6': '585056'
                })
            }, {
                'hits': _hits_df({
                    'P7': '561',
                    'P8': '1038927'
                })
            }, {
                'hits': _hits_df({'P9': '2580236'})
            }]
        }
        me.input_cov = 75
        me.taxdump = taxdump_from_text(taxdump_proteo)
        me.assign_taxonomy()
        self.assertDictEqual(me.input_tax, {'S1': '543', 'S2': '561'})
        self.assertEqual(me.lca, '543')

        # cannot auto-infer taxonomy
        me.data['S3'] = [{'hits': _hits_df({})}]
        me.taxdump = taxdump_from_text(taxdump_proteo)
        with self.assertRaises(ValueError) as ctx:
            me.assign_taxonomy()
        msg = 'Cannot auto-infer taxonomy for S3. Please specify manually.'
        self.assertEqual(str(ctx.exception), msg)

        # invalid input taxonomy string
        me.input_tax = '561'
        with self.assertRaises(ValueError) as ctx:
            me.assign_taxonomy()
        msg = 'Invalid input taxonomy format.'
        self.assertEqual(str(ctx.exception), msg)