Esempio n. 1
0
    def test_write_hgt_list(self):
        me = Analyze()
        me.output = self.tmpdir
        makedirs(join(me.output, 'hgts'), exist_ok=True)
        me.donor_name = False
        me.donor_rank = None
        me.taxdump = taxdump_from_text(taxdump_proteo)
        add_children(me.taxdump)
        me.df = pd.DataFrame(
            [['S1', 'P1', 0.85, '562', True], ['S1', 'P2', 0.95, '622', True],
             ['S1', 'P3', 1.05, '0', True], ['S2', 'P4', 0.80, '766', True],
             ['S2', 'P5', 0.20, '0', False]],
            columns=['sample', 'protein', 'silh', 'match', 'hgt'])

        # default
        me.write_hgt_list('S1')
        with open(join(me.output, 'hgts', 'S1.txt'), 'r') as f:
            obs = f.read()
        exp = ('P1\t0.85\t562\n' 'P2\t0.95\t622\n' 'P3\t1.05\t0\n')
        self.assertEqual(obs, exp)

        # number format and negative result
        me.write_hgt_list('S2')
        with open(join(me.output, 'hgts', 'S2.txt'), 'r') as f:
            self.assertEqual(f.read(), 'P4\t0.8\t766\n')

        # raise to family
        me.donor_rank = 'family'
        me.write_hgt_list('S1')
        with open(join(me.output, 'hgts', 'S1.txt'), 'r') as f:
            obs = f.read()
        exp = ('P1\t0.85\t543\n' 'P2\t0.95\t543\n' 'P3\t1.05\t0\n')
        self.assertEqual(obs, exp)

        # report taxon name
        me.donor_rank = None
        me.donor_name = True
        me.write_hgt_list('S1')
        with open(join(me.output, 'hgts', 'S1.txt'), 'r') as f:
            obs = f.read()
        exp = ('P1\t0.85\tEscherichia coli\n'
               'P2\t0.95\tShigella dysenteriae\n'
               'P3\t1.05\tN/A\n')
        self.assertEqual(obs, exp)
        rmtree(join(me.output, 'hgts'))
Esempio n. 2
0
    def test_predict_hgt(self):
        me = Analyze()

        # populate score table
        n = 1000
        data = {
            'sample': ['S1'] * n,
            'protein': [f'P{x}' for x in range(n)],
            'self':
            np.random.choice(self.dist_gamma, n),
            'close':
            np.concatenate((np.random.choice(self.dist_norm1, int(n / 2)) / 3,
                            np.random.choice(self.dist_norm2, int(n / 2)))),
            'distal':
            np.concatenate(
                (np.random.choice(self.dist_lognorm, int(n * 3 / 4)),
                 np.random.choice(self.dist_gamma, int(n / 4)) / 2)),
            'match': ['0'] * n
        }
        me.df = pd.DataFrame(data)

        # default setting
        me.output = self.tmpdir
        me.self_low = False
        me.bandwidth = 'auto'
        me.bw_steps = 20
        me.low_part = 75
        me.fixed = 25
        me.noise = 50
        me.silhouette = 0.5
        me.taxdump = {}
        me.donor_name = False
        me.donor_rank = None

        # run prediction
        self.assertEqual(me.predict_hgt(), 96)
        groups = ['self', 'close', 'distal']
        for group in groups[1:]:
            fp = join(self.tmpdir, f'{group}.hist.png')
            self.assertTrue(isfile(fp))
            remove(fp)
        fp = join(self.tmpdir, 'scatter.png')
        self.assertTrue(isfile(fp))
        remove(fp)
        fp = join(self.tmpdir, 'hgts')
        self.assertTrue(isfile(join(fp, 'S1.txt')))
        rmtree(fp)

        # constant values
        me.df['close'] = 1
        me.df.drop('hgt', axis=1, inplace=True)
        self.assertEqual(me.predict_hgt(), 0)
        self.assertNotIn('hgt', me.df.columns)
        remove(join(self.tmpdir, 'close.hist.png'))