예제 #1
0
    def test_empty_metadata_values(self):
        # Trusting that the code is sane enough to not invent a distinction
        # between feature and sample metadata where there is none
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['a_new', 'a_new', None], index=['a', 'b', 'c']))
        sample_ids = sample_mc.to_series().index

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data,
                           sample_ids=sample_ids,
                           observation_ids=['x', 'y'])

        with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'c\'}'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        nan_mc = qiime2.MetadataCategory(
            pd.Series(['a_new', float('nan'), 'a_new'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'b\'}'):
            group(table, axis='sample', metadata=nan_mc, mode='sum')

        empty_str = qiime2.MetadataCategory(
            pd.Series(['', 'y_new'], index=['x', 'y']))

        with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'x\'}'):
            group(table,
                  axis='feature',
                  metadata=empty_str,
                  mode='median-ceiling')
예제 #2
0
    def _shared_setup(self):
        sample_mc = qiime2.MetadataCategory(
            pd.Series([
                'treatment', 'treatment', 'control', 'other', 'control',
                'other', 'other'
            ],
                      index=['a', 'b', 'c', 'd', 'e', 'f', 'g']))

        feature_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g1', 'g1', 'g1', 'g0'],
                      index=['v', 'w', 'x', 'y', 'z']))

        data = np.array([
            # t  t   c   o    c     o    o
            # a  b   c   d    e     f    g
            [0, 0, 0, 0, 1, 0, 2],  # v  g0
            [10, 10, 10, 10, 10, 100, 1],  # w  g1
            [12, 3, 14, 0, 0, 3, 34],  # x  g1
            [1, 1, 1, 1, 1, 1, 1],  # y  g1
            [0, 1, 11, 111, 1111, 20, 20]
        ])  # z  g0

        table = biom.Table(data,
                           sample_ids=sample_mc.to_series().index,
                           observation_ids=feature_mc.to_series().index)

        return sample_mc, feature_mc, table
예제 #3
0
    def test_numeric(self):
        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data,
                           sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])

        # ints
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['1', '2', '3'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'numeric'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        # floats
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['1.1', '2.2', '3.3333'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'numeric'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        # mixed
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['0', '42', '4.2'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'numeric'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')
예제 #4
0
    def setUp(self):
        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
                    ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'),
                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
                    ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'),
                    ('@s5/2 abc/2', 'AACC', '+', 'PPPP'),
                    ('@s6/2 abc/2', 'AAAA', '+', 'PPPP'),
                    ('@s7/2 abc/2', 'CGGC', '+', 'PPPP'),
                    ('@s8/2 abc/2', 'GGAA', '+', 'PPPP'),
                    ('@s9/2 abc/2', 'CGGC', '+', 'PPPP'),
                    ('@s10/2 abc/2', 'CGGC', '+', 'PPPP'),
                    ('@s11/2 abc/2', 'GGAA', '+', 'PPPP')]

        self.sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
                          ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
                          ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
                          ('@s4/1 abc/1', 'TTT', '+', 'PPP'),
                          ('@s5/1 abc/1', 'ATA', '+', 'PPP'),
                          ('@s6/1 abc/1', 'TAT', '+', 'PPP'),
                          ('@s7/1 abc/1', 'CGC', '+', 'PPP'),
                          ('@s8/1 abc/1', 'GCG', '+', 'PPP'),
                          ('@s9/1 abc/1', 'ACG', '+', 'PPP'),
                          ('@s10/1 abc/1', 'GCA', '+', 'PPP'),
                          ('@s11/1 abc/1', 'TGA', '+', 'PPP')]
        self.bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences)

        barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA', 'GGAA', 'CGGC'],
                                index=['sample1', 'sample2', 'sample3',
                                       'sample4', 'sample5'])
        self.barcode_map = qiime2.MetadataCategory(barcode_map)
예제 #5
0
    def test_single_sample(self):
        bsi = BarcodeSequenceFastqIterator(self.barcodes[:1],
                                           self.sequences[:1])

        barcode_map = pd.Series(['AAAA'], index=['sample1'])
        barcode_map = qiime2.MetadataCategory(barcode_map)

        demux_data = emp_single(bsi, barcode_map)
        # test that an index.html file is created and that it has size > 0
        # TODO: Remove _PlotQualView wrapper
        with tempfile.TemporaryDirectory() as output_dir:
            result = summarize(output_dir, _PlotQualView(demux_data,
                                                         paired=False), n=1)
            self.assertTrue(result is None)
            index_fp = os.path.join(output_dir, 'overview.html')
            self.assertTrue(os.path.exists(index_fp))
            self.assertTrue(os.path.getsize(index_fp) > 0)
            csv_fp = os.path.join(output_dir, 'per-sample-fastq-counts.csv')
            self.assertTrue(os.path.exists(csv_fp))
            self.assertTrue(os.path.getsize(csv_fp) > 0)
            pdf_fp = os.path.join(output_dir, 'demultiplex-summary.pdf')
            self.assertFalse(os.path.exists(pdf_fp))
            png_fp = os.path.join(output_dir, 'demultiplex-summary.png')
            self.assertFalse(os.path.exists(png_fp))
            with open(index_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<td>Minimum:</td><td>1</td>', html)
                self.assertIn('<td>Maximum:</td><td>1</td>', html)
예제 #6
0
    def test_paired_end(self):
        barcodes = self.barcodes[:3]

        forward = self.sequences[:3]

        reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'),
                   ('@s2/1 abc/1', 'GGG', '+', 'PPP'),
                   ('@s3/1 abc/1', 'TTT', '+', 'PPP')]

        bpsi = BarcodePairedSequenceFastqIterator(barcodes, forward, reverse)

        barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'],
                                index=['sample1', 'sample2', 'sample3'])
        barcode_map = qiime2.MetadataCategory(barcode_map)

        demux_data = emp_paired(bpsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            result = summarize(output_dir, _PlotQualView(demux_data,
                                                         paired=True), n=2)
            self.assertTrue(result is None)
            plot_fp = os.path.join(output_dir, 'quality-plot.html')
            with open(plot_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<h5 class="text-center">Forward Reads</h5>',
                              html)
                self.assertIn('<h5 class="text-center">Reverse Reads</h5>',
                              html)
예제 #7
0
    def test_permanova_pairwise(self):
        dm = skbio.DistanceMatrix(
            [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]],
            ids=['sample1', 'sample2', 'sample3'])
        md = qiime2.MetadataCategory(
            pd.Series(['a', 'b', 'b'],
                      name='a or b',
                      index=['sample1', 'sample2', 'sample3']))

        with tempfile.TemporaryDirectory() as output_dir:
            beta_group_significance(output_dir, dm, md, pairwise=True)
            index_fp = os.path.join(output_dir, 'index.html')
            self.assertTrue(os.path.exists(index_fp))
            # all expected boxplots are generated
            self.assertTrue(
                os.path.exists(os.path.join(output_dir, 'a-boxplots.pdf')))
            self.assertTrue(
                os.path.exists(os.path.join(output_dir, 'a-boxplots.png')))
            self.assertTrue(
                os.path.exists(os.path.join(output_dir, 'b-boxplots.pdf')))
            self.assertTrue(
                os.path.exists(os.path.join(output_dir, 'b-boxplots.png')))
            # no extra boxplots are generated
            self.assertEqual(len(glob.glob('%s/*-boxplots.pdf' % output_dir)),
                             2)
            self.assertEqual(len(glob.glob('%s/*-boxplots.png' % output_dir)),
                             2)
            self.assertTrue('PERMANOVA results' in open(index_fp).read())
            self.assertTrue('Pairwise permanova' in open(index_fp).read())
            self.assertFalse('Warning' in open(index_fp).read())
    def test_with_metadata(self):
        md = qiime2.MetadataCategory(
            pd.Series(['milo', 'summer', 'russ'],
                      name='pet',
                      index=['S1', 'S2', 'S3']))
        heatmap(self.output_dir, self.table, metadata=md)

        self.assertBasicVizValidity(self.output_dir)
예제 #9
0
    def test_one_sample(self):
        md = qiime2.MetadataCategory(
            pd.Series([1.5], name='number', index=['sample1']))
        exp = skbio.DistanceMatrix([[0.0]], ids=['sample1'])

        obs = distance_matrix(md)

        self.assertEqual(exp, obs)
예제 #10
0
    def test_missing_values(self):
        md = qiime2.MetadataCategory(
            pd.Series([1.0, 2.0, np.nan, 4.0],
                      name='number',
                      index=['sample1', 'sample2', 'sample3', 'sample4']))

        with self.assertRaisesRegex(ValueError, 'missing values'):
            distance_matrix(md)
예제 #11
0
    def test_identity_groups(self):
        # These map to the same values as before
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['a', 'b', 'c'], index=['a', 'b', 'c']))
        feature_mc = qiime2.MetadataCategory(
            pd.Series(['x', 'y'], index=['x', 'y']))
        table = biom.Table(np.array([[1, 2, 3], [30, 20, 10]]),
                           sample_ids=sample_mc.to_series().index,
                           observation_ids=feature_mc.to_series().index)

        # Sample x Sum
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(table, result)

        # Sample x Mean
        result = group(table,
                       axis='sample',
                       metadata=sample_mc,
                       mode='mean-ceiling')
        self.assertEqual(table, result)

        # Sample x Median
        result = group(table,
                       axis='sample',
                       metadata=sample_mc,
                       mode='median-ceiling')
        self.assertEqual(table, result)

        # Feature x Sum
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(table, result)

        # Feature x Mean
        result = group(table,
                       axis='feature',
                       metadata=feature_mc,
                       mode='mean-ceiling')
        self.assertEqual(table, result)

        # Feature x Median
        result = group(table,
                       axis='feature',
                       metadata=feature_mc,
                       mode='median-ceiling')
        self.assertEqual(table, result)
    def test_no_sample_cluster(self):
        md = qiime2.MetadataCategory(
            pd.Series(['milo', 'summer', 'russ'],
                      name='pet',
                      index=['S1', 'S2', 'S3']))

        heatmap(self.output_dir, self.table, metadata=md, cluster='features')

        self.assertBasicVizValidity(self.output_dir)
예제 #13
0
    def test_non_numeric_category(self):
        md = qiime2.MetadataCategory(
            pd.Series(['x1', 'x2', '3', '4'],
                      name='number',
                      index=['sample1', 'sample2', 'sample3', 'sample4']))

        with self.assertRaisesRegex(ValueError,
                                    'non-numeric values.*\n\n.*x1'):
            distance_matrix(md)
예제 #14
0
 def test_bad_method(self):
     alpha_div = pd.Series([2.0, 4.0, 6.0], name='alpha-div',
                           index=['sample1', 'sample2', 'sample3'])
     md = qiime2.MetadataCategory(
         pd.Series(['1.0', '2.0', '3.0'], name='value',
                   index=['sample1', 'sample2', 'sample3']))
     with tempfile.TemporaryDirectory() as output_dir:
         with self.assertRaises(ValueError):
             alpha_correlation(output_dir, alpha_div, md, method='bad!')
예제 #15
0
    def test_error_on_missing_metadata(self):
        dm = skbio.DistanceMatrix(
            [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]],
            ids=['sample1', 'sample2', 'sample3'])
        md = qiime2.MetadataCategory(
            pd.Series([1, 2], name='number', index=['sample1', 'sample2']))

        with tempfile.TemporaryDirectory() as output_dir:
            with self.assertRaisesRegex(ValueError, 'no data: sample3'):
                beta_correlation(output_dir, dm, md)
예제 #16
0
    def test_float_category(self):
        md = qiime2.MetadataCategory(
            pd.Series([1.5, 2.0, 3.0],
                      name='number',
                      index=['sample1', 'sample2', 'sample3']))
        exp = skbio.DistanceMatrix(
            [[0.0, 0.5, 1.5], [0.5, 0.0, 1.0], [1.5, 1.0, 0.0]],
            ids=['sample1', 'sample2', 'sample3'])
        obs = distance_matrix(md)

        self.assertEqual(exp, obs)
예제 #17
0
    def test_empty_table(self):
        mc = qiime2.MetadataCategory(
            pd.Series(['a_new', 'b_new'], index=['a', 'b']))

        table = biom.Table(np.array([[]]), sample_ids=[], observation_ids=[])

        with self.assertRaisesRegex(ValueError, 'empty table'):
            group(table, axis='sample', metadata=mc, mode='sum')

        with self.assertRaisesRegex(ValueError, 'empty table'):
            group(table, axis='feature', metadata=mc, mode='sum')
예제 #18
0
    def test_int_category(self):
        md = qiime2.MetadataCategory(
            pd.Series([1, 2, 3],
                      name='number',
                      index=['sample1', 'sample2', 'sample3']))
        exp = skbio.DistanceMatrix([[0, 1, 2], [1, 0, 1], [2, 1, 0]],
                                   ids=['sample1', 'sample2', 'sample3'])

        obs = distance_matrix(md)

        self.assertEqual(exp, obs)
예제 #19
0
    def test_invalid_method(self):
        dm = skbio.DistanceMatrix(
            [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]],
            ids=['sample1', 'sample2', 'sample3'])
        md = qiime2.MetadataCategory(
            pd.Series(['a', 'b', 'b'],
                      name='a or b',
                      index=['sample1', 'sample2', 'sample3']))

        with self.assertRaises(ValueError):
            with tempfile.TemporaryDirectory() as output_dir:
                beta_group_significance(output_dir, dm, md, method='bad!')
예제 #20
0
    def test_error_on_non_numeric_metadata(self):
        dm = skbio.DistanceMatrix(
            [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]],
            ids=['sample1', 'sample2', 'sample3'])
        md = qiime2.MetadataCategory(
            pd.Series([1.0, 2.0, 'hello-world'],
                      name='number',
                      index=['sample1', 'sample2', 'sample3']))

        with tempfile.TemporaryDirectory() as output_dir:
            with self.assertRaisesRegex(ValueError, 'Non-numeric data was'):
                beta_correlation(output_dir, dm, md)
예제 #21
0
    def test_extra_metadata(self):
        dm = skbio.DistanceMatrix(
            [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]],
            ids=['sample1', 'sample2', 'sample3'])
        md = qiime2.MetadataCategory(
            pd.Series(['a', 'b', 'b', 'c'],
                      name='a or b',
                      index=['sample1', 'sample2', 'sample3', 'sample4']))

        with tempfile.TemporaryDirectory() as output_dir:
            beta_group_significance(output_dir, dm, md, permutations=42)
            index_fp = os.path.join(output_dir, 'index.html')
            self.assertTrue('<td>2</td>' in open(index_fp).read())
예제 #22
0
    def test_missing_feature_ids(self):
        feature_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g1', 'g2', 'g1', 'g2', 'extra'],
                      index=['a', 'c', 'd', 'e', 'f', 'g']))
        data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5],
                         [0, 1, 100], [7, 8, 9]])
        # g is missing on purpose
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's3'],
                           observation_ids=['a', 'b', 'c', 'd', 'e', 'f'])

        with self.assertRaisesRegex(ValueError, 'metadata.*missing: {\'b\'}'):
            group(table, axis='feature', metadata=feature_mc, mode='sum')
예제 #23
0
 def test_filtered_samples_str_metadata(self):
     dm = skbio.DistanceMatrix(
         [[0.00, 0.25, 0.25, 0.66], [0.25, 0.00, 0.00, 0.66],
          [0.25, 0.00, 0.00, 0.66], [0.66, 0.66, 0.66, 0.00]],
         ids=['sample1', 'sample2', 'sample3', 'sample4'])
     md = qiime2.MetadataCategory(
         pd.Series(['a', 'b', 'b', ''],
                   name='a or b',
                   index=['sample1', 'sample2', 'sample3', 'sample4']))
     with tempfile.TemporaryDirectory() as output_dir:
         beta_group_significance(output_dir, dm, md)
         index_fp = os.path.join(output_dir, 'index.html')
         self.assertTrue('Warning' in open(index_fp).read())
예제 #24
0
    def test_str_casting(self):
        md = qiime2.MetadataCategory(
            pd.Series(['1', '2', '3', '4'],
                      name='number',
                      index=['sample1', 'sample2', 'sample3', 'sample4']))
        exp = skbio.DistanceMatrix(
            [[0.0, 1.0, 2.0, 3.0], [1.0, 0.0, 1.0, 2.0], [2.0, 1.0, 0.0, 1.0],
             [3.0, 2.0, 1.0, 0.0]],
            ids=['sample1', 'sample2', 'sample3', 'sample4'])

        obs = distance_matrix(md)

        self.assertEqual(exp, obs)
예제 #25
0
    def test_missing_sample_ids(self):
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g2', 'g0', 'g2'], index=['s1', 's3', 's4',
                                                       's6']))
        data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]])
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's4', 's5'],
                           observation_ids=['x', 'y', 'z'])

        with self.assertRaisesRegex(ValueError, 'metadata.*missing:') as e:
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        self.assertIn('s2', str(e.exception))
        self.assertIn('s5', str(e.exception))
예제 #26
0
    def test_reorder(self):
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['c', 'b', 'a'], index=['c', 'b', 'a']))

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data,
                           sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])

        expected = biom.Table(np.array([[3, 2, 1], [10, 20, 30]]),
                              sample_ids=['c', 'b', 'a'],
                              observation_ids=['x', 'y'])
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
예제 #27
0
    def test_superset_sample_group(self):
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g1', 'g2', 'g0', 'g1', 'g2'],
                      index=['s1', 's2', 's3', 's4', 's5', 's6']))
        data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]])
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's4', 's5'],
                           observation_ids=['x', 'y', 'z'])

        expected = biom.Table(np.array([[2, 4], [22, 24], [220, 240]]),
                              sample_ids=['g0', 'g1'],
                              observation_ids=['x', 'y', 'z'])

        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
예제 #28
0
 def test_evaluate_composition_metadata_not_superset(self):
     incomplete_md = qiime2.MetadataCategory(
         pd.DataFrame({'mock_id': ['there_can_only_be_one']},
                      index=['s3'])['mock_id'])
     with self.assertRaisesRegex(ValueError, "Missing samples in metadata"):
         _evaluate_composition(self.exp_one_sample,
                               self.obs,
                               depth=7,
                               palette='Set1',
                               plot_tar=True,
                               plot_tdr=True,
                               plot_r_value=True,
                               plot_r_squared=True,
                               plot_observed_features=True,
                               plot_observed_features_ratio=True,
                               metadata=incomplete_md)
예제 #29
0
    def test_superset_feature_group(self):
        feature_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g0', 'g1', 'g2', 'g1', 'g2', 'extra'],
                      index=['a', 'b', 'c', 'd', 'e', 'f', 'g']))
        data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5],
                         [0, 1, 100], [7, 8, 9]])
        # g is missing on purpose
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's3'],
                           observation_ids=['a', 'b', 'c', 'd', 'e', 'f'])

        expected = biom.Table(np.array([[2, 10, 10], [0, 1, 200], [12, 13,
                                                                   14]]),
                              sample_ids=['s1', 's2', 's3'],
                              observation_ids=['g0', 'g1', 'g2'])
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(expected, result)
예제 #30
0
    def test_subsample_higher_than_seqs_count(self):
        barcodes = self.barcodes[:1]

        sequences = self.sequences[:1]
        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)

        barcode_map = pd.Series(['AAAA'], index=['sample1'])
        barcode_map = qiime2.MetadataCategory(barcode_map)

        demux_data = emp_single(bsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            result = summarize(output_dir, _PlotQualView(demux_data,
                                                         paired=False), n=50)
            self.assertTrue(result is None)
            plot_fp = os.path.join(output_dir, 'quality-plot.html')
            with open(plot_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<strong>Warning:</strong>', html)