def test_standalone_rpca(self):
        """Checks the output produced by gemelli's standalone script.

           This is more of an "integration test" than a unit test -- the
           details of the algorithm used by the standalone CTF script are
           checked in more detail in gemelli/tests/test_factorization.py.
        """
        in_table = get_data_path('test-small.biom')
        in_meta = get_data_path('test-small.tsv')
        out_ = os_path_sep.join(in_table.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(standalone_ctf,
                               ['--in-biom',
                                in_table,
                                '--sample-metadata-file',
                                in_meta,
                                '--individual-id-column',
                                'host_subject_id',
                                '--state-column-1',
                                'context',
                                '--output-dir',
                                out_])
        # check exit code was 0 (indicating success)
        CliTestCase().assertExitCode(0, result)
        # Read the results
        samp_res = pd.read_csv(
            get_data_path('context-subject-ordination.tsv'),
            sep='\t',
            index_col=0)
        feat_res = pd.read_csv(
            get_data_path('context-features-ordination.tsv'),
            sep='\t',
            index_col=0)
        # Read the expected results
        samp_exp = pd.read_csv(
            get_data_path('expected-context-subject-ordination.tsv'),
            sep='\t',
            index_col=0)
        feat_exp = pd.read_csv(
            get_data_path('expected-context-features-ordination.tsv'),
            sep='\t',
            index_col=0)
        # Check that the distance matrix matches our expectations
        comp_col = ['PC1', 'PC2', 'PC3']
        cent_ = samp_res[comp_col].mean().values.max()
        self.assertAlmostEqual(cent_, 0)
        cent_ = feat_res[comp_col].mean().values.max()
        self.assertAlmostEqual(cent_, 0)
        # check matched
        assert_allclose(absolute_sort(samp_res[comp_col].values),
                        absolute_sort(samp_exp[comp_col].values),
                        atol=.5)
        assert_allclose(absolute_sort(feat_res[comp_col].values),
                        absolute_sort(feat_exp[comp_col].values),
                        atol=.5)
Beispiel #2
0
    def test_qiime2_ctf(self):
        """Tests that the Q2 and standalone ctf results match.

           Also validates against ground truth "expected" results.
        """

        # Run gemelli through QIIME 2 (specifically, the Artifact API)
        res = q2gemelli.actions.ctf(table=self.q2table,
                                    sample_metadata=self.q2meta,
                                    individual_id_column=self.subj,
                                    state_column=self.state)
        oqza, dqza, sqza, fqza = res
        # Get the underlying data from these artifacts
        q2straj = sqza.view(pd.DataFrame)
        q2ftraj = fqza.view(pd.DataFrame)

        # Next, run gemelli outside of QIIME 2. We're gonna check that
        # everything matches up.
        # ...First, though, we need to write the contents of self.q2table to a
        # BIOM file, so gemelli can understand it.
        # Derived from a line in test_standalone_ctf()
        out_ = os_path_sep.join(self.in_table.split(os_path_sep)[:-1])
        # Run gemelli outside of QIIME 2...
        runner = CliRunner()
        result = runner.invoke(standalone_ctf,
                               ['--in-biom',
                                self.in_table,
                                '--sample-metadata-file',
                                self.in_meta,
                                '--individual-id-column',
                                'host_subject_id',
                                '--state-column-1',
                                'context',
                                '--output-dir',
                                out_])
        # check exit code was 0 (indicating success)
        self.assertEqual(result.exit_code, 0)
        # ...and read in the resulting output files. This code was derived from
        # test_standalone_ctf() elsewhere in gemelli's codebase.
        samp_res = read_csv(
            get_data_path('context-subject-ordination.tsv'),
            sep='\t',
            index_col=0)
        feat_res = read_csv(
            get_data_path('context-features-ordination.tsv'),
            sep='\t',
            index_col=0)
        # Check that the trajectory matrix matches our expectations
        comp_col = ['PC1', 'PC2', 'PC3']
        assert_allclose(absolute_sort(samp_res[comp_col].values),
                        absolute_sort(q2straj[comp_col].values),
                        atol=.5)
        assert_allclose(absolute_sort(feat_res[comp_col].values),
                        absolute_sort(q2ftraj[comp_col].values),
                        atol=.5)