def test_write_hgt_list(self): me = Analyze() me.output = self.tmpdir makedirs(join(me.output, 'hgts'), exist_ok=True) me.donor_name = False me.donor_rank = None me.taxdump = taxdump_from_text(taxdump_proteo) add_children(me.taxdump) me.df = pd.DataFrame( [['S1', 'P1', 0.85, '562', True], ['S1', 'P2', 0.95, '622', True], ['S1', 'P3', 1.05, '0', True], ['S2', 'P4', 0.80, '766', True], ['S2', 'P5', 0.20, '0', False]], columns=['sample', 'protein', 'silh', 'match', 'hgt']) # default me.write_hgt_list('S1') with open(join(me.output, 'hgts', 'S1.txt'), 'r') as f: obs = f.read() exp = ('P1\t0.85\t562\n' 'P2\t0.95\t622\n' 'P3\t1.05\t0\n') self.assertEqual(obs, exp) # number format and negative result me.write_hgt_list('S2') with open(join(me.output, 'hgts', 'S2.txt'), 'r') as f: self.assertEqual(f.read(), 'P4\t0.8\t766\n') # raise to family me.donor_rank = 'family' me.write_hgt_list('S1') with open(join(me.output, 'hgts', 'S1.txt'), 'r') as f: obs = f.read() exp = ('P1\t0.85\t543\n' 'P2\t0.95\t543\n' 'P3\t1.05\t0\n') self.assertEqual(obs, exp) # report taxon name me.donor_rank = None me.donor_name = True me.write_hgt_list('S1') with open(join(me.output, 'hgts', 'S1.txt'), 'r') as f: obs = f.read() exp = ('P1\t0.85\tEscherichia coli\n' 'P2\t0.95\tShigella dysenteriae\n' 'P3\t1.05\tN/A\n') self.assertEqual(obs, exp) rmtree(join(me.output, 'hgts'))
def test_predict_hgt(self): me = Analyze() # populate score table n = 1000 data = { 'sample': ['S1'] * n, 'protein': [f'P{x}' for x in range(n)], 'self': np.random.choice(self.dist_gamma, n), 'close': np.concatenate((np.random.choice(self.dist_norm1, int(n / 2)) / 3, np.random.choice(self.dist_norm2, int(n / 2)))), 'distal': np.concatenate( (np.random.choice(self.dist_lognorm, int(n * 3 / 4)), np.random.choice(self.dist_gamma, int(n / 4)) / 2)), 'match': ['0'] * n } me.df = pd.DataFrame(data) # default setting me.output = self.tmpdir me.self_low = False me.bandwidth = 'auto' me.bw_steps = 20 me.low_part = 75 me.fixed = 25 me.noise = 50 me.silhouette = 0.5 me.taxdump = {} me.donor_name = False me.donor_rank = None # run prediction self.assertEqual(me.predict_hgt(), 96) groups = ['self', 'close', 'distal'] for group in groups[1:]: fp = join(self.tmpdir, f'{group}.hist.png') self.assertTrue(isfile(fp)) remove(fp) fp = join(self.tmpdir, 'scatter.png') self.assertTrue(isfile(fp)) remove(fp) fp = join(self.tmpdir, 'hgts') self.assertTrue(isfile(join(fp, 'S1.txt'))) rmtree(fp) # constant values me.df['close'] = 1 me.df.drop('hgt', axis=1, inplace=True) self.assertEqual(me.predict_hgt(), 0) self.assertNotIn('hgt', me.df.columns) remove(join(self.tmpdir, 'close.hist.png'))