Beispiel #1
0
    def testFuncBar(self):
        infile = testfile('eggnog_out.tab')
        tabfile = testfile('viz_f_filt.tab')
        # bp
        run_viz('bar', self.img, infile, mode='f',
                nterms='2', meancol='NS_mean',
                target_onto='bp', barcol='1', tabfile=tabfile)
        df = pd.read_csv(tabfile, sep='\t')
        namespace = df.namespace.unique()
        self.assertEqual(namespace, ['biological_process'])

        # cc
        run_viz('bar', self.img, infile, mode='f',
                nterms='2', meancol='NS_mean',
                target_onto='cc', barcol='1', tabfile=tabfile)
        df = pd.read_csv(tabfile, sep='\t')
        namespace = df.namespace.unique()
        self.assertEqual(namespace, ['cellular_component'])

        # mf
        run_viz('bar', self.img, infile, mode='f',
                nterms='2', meancol='NS_mean',
                target_onto='mf', barcol='1', tabfile=tabfile)
        df = pd.read_csv(tabfile, sep='\t')
        namespace = df.namespace.unique()
        self.assertEqual(namespace, ['molecular_function'])
 def testMultCols(self):
     tax=testfile('multiple_tax.tab')
     int=testfile('multiple_int.tab')
     tax_df = expand.expand('t', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide',
                            pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax,
                            tax_colname='lca')
     self.assertEqual(tax_df.query("rank == 'phylum' and taxon_name == 'Proteobacteria'")['int3'].values[0], np.log2(70))
Beispiel #3
0
    def testFtDist(self):
        infile = testfile('ft_out.tab')
        run_viz('ft_dist', self.img, infile,
                meancol="s1_mean",
                whichway='t_dist',
                id="GO:0008150",
                target_rank="genus",
                nterms="all")

        # f dist
        run_viz('ft_dist', self.img, infile,
                meancol="s1_mean",
                whichway='f_dist',
                target_onto="bp",
                nterms="all", id=209)

        # test tabfile
        tabfile = testfile("tmp")
        run_viz('ft_dist', self.img, infile,
                meancol="s1_mean",
                whichway='t_dist',
                id="GO:0008150",
                target_rank="genus",
                nterms="all",
                tabfile=tabfile)
        self.assertTrue(os.path.exists(tabfile))
        os.remove(tabfile)
 def testSingleBasic(self):
     tax = testfile('simple_tax.tab')
     int = testfile('simple_int.tab')
     tax_df = expand.expand('t', sinfo='{"s1": ["int"]}', int_file=int, pep_colname_int='peptide',
                            pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax,
                            tax_colname='lca')
     self.assertEqual(tax_df.query("taxon_name == 'Helicobacter pylori'")['int'].values, np.log2(100))
Beispiel #5
0
 def testDA(self):
     func = testfile('multiple_func.tab')
     int = testfile('int_ttest.tab')
     expanded = testfile('go_expanded_ttest.tab')
     test_write = testfile('go_tested.tab')
     df_expd = expand.expand('f',
                             sinfo=TTEST_SINFO,
                             int_file=int,
                             pep_colname_int='peptide',
                             pep_colname_func='peptide',
                             pep_colname_tax='peptide',
                             data_dir=TEST_DIR,
                             outfile=expanded,
                             func_file=func,
                             func_colname='go',
                             ontology='go')
     df_tst = stat.stat(expanded,
                        sinfo=TTEST_SINFO,
                        paired=False,
                        parametric=True,
                        ontology='go',
                        mode='f',
                        outfile=test_write)
     # make sure false is > 0.05 and trues are less than 0.05
     self.assertTrue(df_tst['p']['GO:0008152'] > 0.05)
     self.assertTrue(df_tst['p'][['GO:0022610', 'GO:0000003',
                                  'GO:0032505']].le(0.05).all())
Beispiel #6
0
    def testReadAndDE(self):
        func = testfile('multiple_func.tab')
        int = testfile('int_ttest.tab')
        '''intensity:
        peptide	int1	int2	int3	int4	int5	int6
        A	12	20	15	12	21	10
        B	20	30	20	3500	2000	3000
        C	1000	1200	900	12	13	10
        '''
        # todo - add test for non-slim

        tax = testfile('multiple_tax.tab')
        ft_out = testfile('ft_out.tab')
        ft_df = expand.expand(mode='ft',
                              sinfo=tu.TTEST_SINFO,
                              int_file=int,
                              pep_colname_int='peptide',
                              pep_colname_func='peptide',
                              pep_colname_tax='peptide',
                              data_dir=TEST_DIR,
                              outfile=ft_out,
                              func_file=func,
                              func_colname='go',
                              ontology='go',
                              slim_down=True,
                              tax_file=tax,
                              tax_colname='lca')
        # make sure calculated mean is accurate
        # b and c both map to 8150
        exp_s1_mean = np.log2(((20 + 1000) + (1200 + 30) + (900 + 20)) / 3)
        obtained_mean = ft_df.loc[(ft_df['taxon_name'] == 'Clostridioides') &
                                  (ft_df['go'] == 'GO:0008150'), 's1_mean'][0]

        self.assertEqual(exp_s1_mean, obtained_mean)
Beispiel #7
0
 def testTaxTTests(self):
     tax = testfile('multiple_tax.tab')
     int = testfile('int_ttest.tab')
     expanded = testfile('expand_taxttest.tab')
     tax_df = expand.expand('t',
                            sinfo=TTEST_SINFO,
                            int_file=int,
                            pep_colname_int='peptide',
                            pep_colname_func='peptide',
                            pep_colname_tax='peptide',
                            data_dir=TEST_DIR,
                            outfile=expanded,
                            tax_file=tax,
                            tax_colname='lca')
     tax_tst = stat.stat(expanded,
                         sinfo=TTEST_SINFO,
                         paired=False,
                         parametric=False,
                         ontology=None,
                         mode=None,
                         outfile=None)
     # make sure false is > 0.05 and trues are less than 0.05
     self.assertTrue(tax_tst['p'][210] > 0.05)
     self.assertTrue(tax_tst['p'][[1496, 1870884]].le(0.05).all())
     # also, make sure firmicutes phylum is sum of c difficile and clostridiaceae
     self.assertEqual(tax_tst['int1'][1239], np.log2(1020))
Beispiel #8
0
 def testDiffAbundEc(self):
     func = testfile('multiple_func.tab')
     int = testfile('int_ttest.tab')
     expandfile = testfile('ec_ttest.tab')
     tested_file = testfile('ec_ttest_tested.tab')
     expand.expand('f',
                   sinfo=TTEST_SINFO,
                   int_file=int,
                   pep_colname_int='peptide',
                   pep_colname_func='peptide',
                   pep_colname_tax='peptide',
                   data_dir=TEST_DIR,
                   outfile=expandfile,
                   func_file=func,
                   func_colname='ec',
                   ontology='ec')
     ec_tst = stat.stat(expandfile,
                        sinfo=TTEST_SINFO,
                        paired=False,
                        parametric=True,
                        ontology='ec',
                        mode='f',
                        outfile=tested_file)
     # make sure false is > 0.05 and trues are less than 0.05
     self.assertTrue(ec_tst['p']['3.4.11.-'] > 0.05)
     self.assertTrue(ec_tst['p'][['3.4.21.70', '1.2.-.-']].le(0.05).all())
 def testSimpleEc(self):
     func=testfile('simple_ec.tab')
     int=testfile('simple_int.tab')
     ec_df = expand.expand('f', sinfo='{"s1": ["int"]}', int_file=int, pep_colname_int='peptide',
                           pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func,
                           func_colname='ec', ontology='ec')
     self.assertEqual(ec_df.loc["3.4.11.-"]['int'], np.log2(100))
     self.assertEqual(ec_df.loc["3.4.-.-"]['int'], np.log2(300))
 def testCog(self):
     func=testfile('multiple_func.tab')
     int=testfile('multiple_int.tab')
     cog_df = expand.expand('f', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide',
                            pep_colname_func='peptide', pep_colname_tax='peptide', func_file=func, func_colname='cog',
                            ontology='cog')
     self.assertEqual(cog_df.loc["C"]['s1_mean'], np.log2((10+20+70)/3))
     self.assertEqual(cog_df.loc["N"]['int2'], np.log2(30))
 def testSingleInt(self):
     func=testfile('simple_func.tab')
     int=testfile('simple_int.tab')
     go_df = expand.expand('f', sinfo='{"s1": ["int"]}', int_file=int, pep_colname_int='peptide',
                           pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func,
                           func_colname='go', ontology='go')
     self.assertEqual(go_df.loc["GO:0022610"]['int'], np.log2(200))
     self.assertEqual(go_df.loc["GO:0008152"]['int'], np.log2(100))
 def testDifferentNames(self):
     tax = testfile('ft_tax.tab')
     func = testfile('ft_func.tab')
     int = testfile('ft_int.tab')
     ft = expand.expand('ft', sinfo='{"A": ["int"]}', int_file=int, pep_colname_int='Sequence',
                        pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax,
                        tax_colname='lca', func_file=func, func_colname="go")
     self.assertIn("A_mean", list(ft))
 def testWrite(self):
     tax = testfile('simple_tax.tab')
     int = testfile('simple_int.tab')
     out = testfile('taxonomy_write_simple.tab')
     df = expand.expand(mode='t', sinfo='{"samp1": ["int"]}', int_file=int, pep_colname_int='peptide',
                        pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=out, tax_file=tax,
                        tax_colname='lca')
     written = pd.read_table(out)
     self.assertAlmostEqual(written.query("taxon_name == 'Clostridioides difficile'")['samp1_mean'].values[0], np.log2(200))
Beispiel #14
0
 def testPCABig(self):
     infile = testfile('tax_filt_out.tab')
     sampfile = testfile('rudney_samples.tab')
     imgfile = testfile('cli_pca_viz.png')
     cmd = ' '.join([
         'python3 metaquantome/cli.py viz -m t --plottype pca', '--infile',
         infile, '--img', imgfile, "--samps", sampfile, '--calculate_sep'
     ])
     test_status = subprocess.call(cmd, shell=True)
     self.assertEqual(test_status, 0)
 def testMultipleEc(self):
     func=testfile('multiple_func.tab')
     int=testfile('multiple_int.tab')
     ec_df = expand.expand('f', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide',
                           pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func,
                           func_colname='ec', ontology='ec')
     self.assertEqual(ec_df.loc['3.4.-.-']['int1'], np.log2(50))
     self.assertEqual(ec_df.loc['1.2.-.-']['int2'], np.log2(50))
     # missing values (zeros, nans, NA's, etc) are turned into NaN's
     self.assertTrue(np.isnan(ec_df.loc['1.2.-.-']['int3']))
Beispiel #16
0
 def testViz(self):
     infile = testfile('taxonomy_write_simple.tab')
     imgfile = testfile('cli_bar_viz.png')
     cmd = ' '.join([
         'python3 metaquantome/cli.py viz -m t --plottype bar --infile',
         infile, '--img', imgfile, """--samps '{"samp1": ["int"]}'""",
         '--nterms 2 --meancol samp1_mean --target_rank genus'
     ])
     test_status = subprocess.call(cmd, shell=True)
     self.assertEqual(test_status, 0)
 def testMultipleInt(self):
     func = testfile('multiple_func.tab')
     int = testfile('multiple_int.tab')
     go_df = expand.expand('f', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide',
                           pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func,
                           func_colname='go', ontology='go')
     self.assertEqual(go_df.loc['GO:0008152']['int1'], np.log2(10))
     self.assertEqual(go_df.loc['GO:0022610']['int2'], np.log2(30))
     # missing values (zeros, nans, NA's, etc) are turned into NaN's
     self.assertTrue(np.isnan(go_df.loc['GO:0000003']['int3']))
     return go_df
Beispiel #18
0
 def testBasicTaxBar(self):
     infile = testfile('taxonomy_write_simple.tab')
     tabfile = testfile('taxonomy_plot_out.tab')
     run_viz('bar', self.img, infile, mode='t',
             nterms='2', meancol='samp1_mean',
             target_rank="genus", barcol="6",
             tabfile=tabfile)
     self.assertTrue(os.path.exists(tabfile))
     os.remove(tabfile)
     run_viz('bar', self.img, infile, mode='t',
             nterms='2', meancol='samp1_mean',
             target_rank="genus", barcol="6")
Beispiel #19
0
 def testFuncBar(self):
     infile = testfile('eggnog_out.tab')
     imgfile = testfile('test_eggnog_viz.png')
     samps = testfile('rudney_samples.tab')
     tabfile = testfile("eggnog_viz_file.tab")
     cmd = ' '.join([
         'python3 metaquantome/cli.py viz -m f --plottype bar '
         '--infile', infile, '--img', imgfile, '--samps', samps,
         '--nterms 20 --meancol NS_mean --target_onto bp', '--tabfile',
         tabfile
     ])
     test_status = subprocess.call(cmd, shell=True)
     self.assertEqual(test_status, 0)
Beispiel #20
0
 def testCogTTest(self):
     func = testfile('multiple_func.tab')
     int = testfile('int_ttest.tab')
     expandfile = testfile('cog_ttest.tab')
     cog_df = expand.expand('f', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide',
                            pep_colname_tax='peptide', outfile=expandfile, func_file=func, func_colname='cog',
                            ontology='cog')
     cog_tst = stat.stat(expandfile, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='cog', mode='f', control_group='s2',
                         outfile=None)
     # make sure false is > 0.05 and trues are less than 0.05
     cog_tst.set_index('id', inplace=True)
     self.assertTrue(cog_tst['p_s1_over_s2']['C'] > 0.05)
     self.assertTrue(cog_tst['p_s1_over_s2'][['N', 'D']].le(0.05).all())
 def testSlimDown(self):
     func=testfile('func_eggnog.tab')
     int=testfile('int_eggnog.tab')
     outfile=testfile('eggnog_out.tab')
     sinfo='{"NS": ["int737NS", "int852NS", "int867NS"], "WS": ["int737WS", "int852WS", "int867WS"]}'
     go_df = expand.expand('f', sinfo=sinfo, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide',
                           pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func, func_colname='go', ontology='go',
                           slim_down=True, outfile=outfile)
     # test that all go terms are in slim
     # load slim
     returned_gos = set(go_df['id'])
     # potential of unknown, so just drop that
     returned_gos.discard('unknown')
     self.assertTrue(returned_gos.issubset(self.db.goslim.keys()))
Beispiel #22
0
 def testGOVolcano(self):
     infile = testfile('go_tested.tab')
     run_viz('volcano', self.img, infile,
             textannot="id",
             fc_name="log2fc_s1_over_s2",
             fc_corr_p="corrected_p_s1_over_s2",
             gosplit=True)
Beispiel #23
0
    def testVolcano(self):
        infile = testfile('cli_mult_test_out.tab')
        run_viz('volcano', self.img, infile,
                textannot="id",
                fc_corr_p="corrected_p_s1_over_s2",
                fc_name="log2fc_s1_over_s2")

        # test tabfile
        tabfile = testfile('taxonomy_plot_out.tab')
        run_viz('volcano', self.img, infile,
                textannot="id",
                fc_name="log2fc_s1_over_s2",
                fc_corr_p="corrected_p_s1_over_s2",
                tabfile=tabfile)
        self.assertTrue(os.path.exists(tabfile))
        os.remove(tabfile)
Beispiel #24
0
 def testPCA(self):
     infile = testfile('go_tested.tab')
     run_viz('pca',
             self.img,
             infile,
             sinfo=TTEST_SINFO,
             calculate_sep=False)
 def testNopep(self):
     nopep=testfile('nopep.tab')
     tax_df = expand.expand('t', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=None, pep_colname_int='peptide',
                            pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_colname='lca',
                            nopep=True, nopep_file=nopep)
     self.assertEqual(tax_df.query("rank == 'phylum' and taxon_name == 'Proteobacteria'")['int3'].values[0],
                      np.log2(70))
Beispiel #26
0
 def testHeatmapViz(self):
     infile = testfile('ec_ttest_tested.tab')
     imgfile = testfile('cli_heatmap_viz.png')
     cmd = ' '.join([
         'python3 metaquantome/cli.py viz -m f --ontology ec --plottype heatmap',
         '--infile', infile, '--img', imgfile, "--samps '", TTEST_SINFO,
         "'", '--filter_to_sig', '--alpha 0.5'
     ])
     test_status = subprocess.call(cmd, shell=True)
     self.assertEqual(test_status, 0)
     cmd2 = ' '.join([
         'python3 metaquantome/cli.py viz -m f --ontology ec --plottype heatmap',
         '--infile', infile, '--img', imgfile, "--samps '", TTEST_SINFO, "'"
     ])
     test_status2 = subprocess.call(cmd2, shell=True)
     self.assertEqual(test_status2, 0)
 def testParentIntensityHigher(self):
     """
     make sure that parents always have higher intensity than children
     """
     tax=testfile('test_root_sum_uni.tab')
     int=testfile('test_root_sum_int.tab')
     tax_df = expand.expand('t', sinfo='{"A": ["int"]}', int_file=int, pep_colname_int='peptide',
                            pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax,
                            tax_colname='taxon_id')
     # filter to phylum and below
     tax_df_filt = tax_df[(tax_df["rank"] != 'no rank') & (tax_df["rank"] != 'superkingdom')]
     # firmicutes phylum should be highest
     ints = tax_df_filt['int']
     self.assertEqual(ints.max(), ints[1239])
     # strep genus intensity should be greater than or equal to that of strep species
     self.assertGreaterEqual(ints[1301], ints[1302])
     self.assertGreaterEqual(ints[1301], ints[1305])
    def testFilter(self):
        intfile = testfile('filt_int.tab')
        taxfile = testfile('multiple_tax.tab')
        expandfile = testfile('expand_out.tab')

        expanded = expand('t',
                          TTEST_SINFO,
                          int_file=intfile,
                          pep_colname_int='peptide',
                          pep_colname_func='peptide',
                          pep_colname_tax='peptide',
                          data_dir=TEST_DIR,
                          outfile=expandfile,
                          tax_file=taxfile,
                          tax_colname='lca')
        exp_ids = set(expanded['id'])

        # no filtering
        nofilt = run_filter(expandfile,
                            TTEST_SINFO,
                            ontology=None,
                            mode="t",
                            qthreshold=0,
                            min_child_non_leaf=0,
                            min_child_nsamp=0,
                            min_peptides=0,
                            min_pep_nsamp=0)
        nofilt_ids = set(nofilt['id'])

        # make sure that ids are the same when no filtering is done
        self.assertSetEqual(nofilt_ids, exp_ids)

        # now, require 3 intensities per group. we shouldn't see 1496 or 1870884
        filt3 = run_filter(expandfile,
                           TTEST_SINFO,
                           ontology=None,
                           mode="t",
                           qthreshold=3,
                           min_child_non_leaf=0,
                           min_child_nsamp=0,
                           min_peptides=0,
                           min_pep_nsamp=0)
        filt3_ids = set(filt3['id'])
        self.assertNotIn(1496, filt3_ids)
        self.assertNotIn(1870884, filt3_ids)
Beispiel #29
0
 def testVizTabfile(self):
     infile = testfile('taxonomy_write_simple.tab')
     imgfile = testfile('cli_bar_viz2.png')
     tabfile = testfile("tmp")
     cmd = ' '.join([
         'python3 metaquantome/cli.py viz -m t --plottype bar --infile',
         infile,
         '--img',
         imgfile,
         """--samps '{"samp1": ["int"]}'""",
         '--nterms 2 --meancol samp1_mean --target_rank genus',
         '--tabfile',
         tabfile,
     ])
     test_status = subprocess.call(cmd, shell=True)
     self.assertEqual(test_status, 0)
     nline = subprocess.run(['wc', '-l', tabfile], stdout=subprocess.PIPE)
     self.assertEqual(b'3', nline.stdout.strip().split()[0])
     os.remove(tabfile)
Beispiel #30
0
    def testMultipleInt(self):
        exp_out = testfile('cli_mult_out.tab')
        exp_command = '''python3 metaquantome/cli.py expand -m f --pep_colname_int peptide --pep_colname_func peptide ''' +\
            '''--outfile ''' + exp_out
        exp_command += ''' -i metaquantome/data/test/int_ttest.tab --func_file metaquantome/data/test/multiple_func.tab '''
        exp_command += ''' --func_colname cog --ontology cog ''' + " --samps '" + TTEST_SINFO + "' "
        exp_command += '''--data_dir ''' + TEST_DIR
        exp_status = subprocess.call(exp_command, shell=True)
        self.assertEqual(exp_status, 0)

        test_out = testfile('cli_mult_test_out.tab')
        test_command = "python3 metaquantome/cli.py stat -m f --outfile " + test_out + ' --file ' + exp_out
        test_command += ''' --ontology cog ''' + " --samps '" + TTEST_SINFO + "'" + ' --parametric True '
        test_status = subprocess.call(test_command, shell=True)
        self.assertEqual(test_status, 0)

        test_df = pd.read_csv(test_out, sep="\t", index_col='id')
        # make sure false is > 0.05 and trues are less than 0.05
        self.assertTrue(test_df['corrected_p']['C'] > 0.05)
        self.assertTrue(test_df['corrected_p'][['N', 'D']].le(0.05).all())