def test_empty_metadata_values(self): # Trusting that the code is sane enough to not invent a distinction # between feature and sample metadata where there is none sample_mc = qiime2.MetadataCategory( pd.Series(['a_new', 'a_new', None], index=['a', 'b', 'c'])) sample_ids = sample_mc.to_series().index data = np.array([[1, 2, 3], [30, 20, 10]]) table = biom.Table(data, sample_ids=sample_ids, observation_ids=['x', 'y']) with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'c\'}'): group(table, axis='sample', metadata=sample_mc, mode='sum') nan_mc = qiime2.MetadataCategory( pd.Series(['a_new', float('nan'), 'a_new'], index=['a', 'b', 'c'])) with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'b\'}'): group(table, axis='sample', metadata=nan_mc, mode='sum') empty_str = qiime2.MetadataCategory( pd.Series(['', 'y_new'], index=['x', 'y'])) with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'x\'}'): group(table, axis='feature', metadata=empty_str, mode='median-ceiling')
def _shared_setup(self): sample_mc = qiime2.MetadataCategory( pd.Series([ 'treatment', 'treatment', 'control', 'other', 'control', 'other', 'other' ], index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])) feature_mc = qiime2.MetadataCategory( pd.Series(['g0', 'g1', 'g1', 'g1', 'g0'], index=['v', 'w', 'x', 'y', 'z'])) data = np.array([ # t t c o c o o # a b c d e f g [0, 0, 0, 0, 1, 0, 2], # v g0 [10, 10, 10, 10, 10, 100, 1], # w g1 [12, 3, 14, 0, 0, 3, 34], # x g1 [1, 1, 1, 1, 1, 1, 1], # y g1 [0, 1, 11, 111, 1111, 20, 20] ]) # z g0 table = biom.Table(data, sample_ids=sample_mc.to_series().index, observation_ids=feature_mc.to_series().index) return sample_mc, feature_mc, table
def test_numeric(self): data = np.array([[1, 2, 3], [30, 20, 10]]) table = biom.Table(data, sample_ids=['a', 'b', 'c'], observation_ids=['x', 'y']) # ints sample_mc = qiime2.MetadataCategory( pd.Series(['1', '2', '3'], index=['a', 'b', 'c'])) with self.assertRaisesRegex(ValueError, 'numeric'): group(table, axis='sample', metadata=sample_mc, mode='sum') # floats sample_mc = qiime2.MetadataCategory( pd.Series(['1.1', '2.2', '3.3333'], index=['a', 'b', 'c'])) with self.assertRaisesRegex(ValueError, 'numeric'): group(table, axis='sample', metadata=sample_mc, mode='sum') # mixed sample_mc = qiime2.MetadataCategory( pd.Series(['0', '42', '4.2'], index=['a', 'b', 'c'])) with self.assertRaisesRegex(ValueError, 'numeric'): group(table, axis='sample', metadata=sample_mc, mode='sum')
def setUp(self): barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s3/2 abc/2', 'AACC', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s5/2 abc/2', 'AACC', '+', 'PPPP'), ('@s6/2 abc/2', 'AAAA', '+', 'PPPP'), ('@s7/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s8/2 abc/2', 'GGAA', '+', 'PPPP'), ('@s9/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s10/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s11/2 abc/2', 'GGAA', '+', 'PPPP')] self.sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAA', '+', 'PPP'), ('@s4/1 abc/1', 'TTT', '+', 'PPP'), ('@s5/1 abc/1', 'ATA', '+', 'PPP'), ('@s6/1 abc/1', 'TAT', '+', 'PPP'), ('@s7/1 abc/1', 'CGC', '+', 'PPP'), ('@s8/1 abc/1', 'GCG', '+', 'PPP'), ('@s9/1 abc/1', 'ACG', '+', 'PPP'), ('@s10/1 abc/1', 'GCA', '+', 'PPP'), ('@s11/1 abc/1', 'TGA', '+', 'PPP')] self.bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences) barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA', 'GGAA', 'CGGC'], index=['sample1', 'sample2', 'sample3', 'sample4', 'sample5']) self.barcode_map = qiime2.MetadataCategory(barcode_map)
def test_single_sample(self): bsi = BarcodeSequenceFastqIterator(self.barcodes[:1], self.sequences[:1]) barcode_map = pd.Series(['AAAA'], index=['sample1']) barcode_map = qiime2.MetadataCategory(barcode_map) demux_data = emp_single(bsi, barcode_map) # test that an index.html file is created and that it has size > 0 # TODO: Remove _PlotQualView wrapper with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=False), n=1) self.assertTrue(result is None) index_fp = os.path.join(output_dir, 'overview.html') self.assertTrue(os.path.exists(index_fp)) self.assertTrue(os.path.getsize(index_fp) > 0) csv_fp = os.path.join(output_dir, 'per-sample-fastq-counts.csv') self.assertTrue(os.path.exists(csv_fp)) self.assertTrue(os.path.getsize(csv_fp) > 0) pdf_fp = os.path.join(output_dir, 'demultiplex-summary.pdf') self.assertFalse(os.path.exists(pdf_fp)) png_fp = os.path.join(output_dir, 'demultiplex-summary.png') self.assertFalse(os.path.exists(png_fp)) with open(index_fp, 'r') as fh: html = fh.read() self.assertIn('<td>Minimum:</td><td>1</td>', html) self.assertIn('<td>Maximum:</td><td>1</td>', html)
def test_paired_end(self): barcodes = self.barcodes[:3] forward = self.sequences[:3] reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'), ('@s2/1 abc/1', 'GGG', '+', 'PPP'), ('@s3/1 abc/1', 'TTT', '+', 'PPP')] bpsi = BarcodePairedSequenceFastqIterator(barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'], index=['sample1', 'sample2', 'sample3']) barcode_map = qiime2.MetadataCategory(barcode_map) demux_data = emp_paired(bpsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<h5 class="text-center">Forward Reads</h5>', html) self.assertIn('<h5 class="text-center">Reverse Reads</h5>', html)
def test_permanova_pairwise(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]], ids=['sample1', 'sample2', 'sample3']) md = qiime2.MetadataCategory( pd.Series(['a', 'b', 'b'], name='a or b', index=['sample1', 'sample2', 'sample3'])) with tempfile.TemporaryDirectory() as output_dir: beta_group_significance(output_dir, dm, md, pairwise=True) index_fp = os.path.join(output_dir, 'index.html') self.assertTrue(os.path.exists(index_fp)) # all expected boxplots are generated self.assertTrue( os.path.exists(os.path.join(output_dir, 'a-boxplots.pdf'))) self.assertTrue( os.path.exists(os.path.join(output_dir, 'a-boxplots.png'))) self.assertTrue( os.path.exists(os.path.join(output_dir, 'b-boxplots.pdf'))) self.assertTrue( os.path.exists(os.path.join(output_dir, 'b-boxplots.png'))) # no extra boxplots are generated self.assertEqual(len(glob.glob('%s/*-boxplots.pdf' % output_dir)), 2) self.assertEqual(len(glob.glob('%s/*-boxplots.png' % output_dir)), 2) self.assertTrue('PERMANOVA results' in open(index_fp).read()) self.assertTrue('Pairwise permanova' in open(index_fp).read()) self.assertFalse('Warning' in open(index_fp).read())
def test_with_metadata(self): md = qiime2.MetadataCategory( pd.Series(['milo', 'summer', 'russ'], name='pet', index=['S1', 'S2', 'S3'])) heatmap(self.output_dir, self.table, metadata=md) self.assertBasicVizValidity(self.output_dir)
def test_one_sample(self): md = qiime2.MetadataCategory( pd.Series([1.5], name='number', index=['sample1'])) exp = skbio.DistanceMatrix([[0.0]], ids=['sample1']) obs = distance_matrix(md) self.assertEqual(exp, obs)
def test_missing_values(self): md = qiime2.MetadataCategory( pd.Series([1.0, 2.0, np.nan, 4.0], name='number', index=['sample1', 'sample2', 'sample3', 'sample4'])) with self.assertRaisesRegex(ValueError, 'missing values'): distance_matrix(md)
def test_identity_groups(self): # These map to the same values as before sample_mc = qiime2.MetadataCategory( pd.Series(['a', 'b', 'c'], index=['a', 'b', 'c'])) feature_mc = qiime2.MetadataCategory( pd.Series(['x', 'y'], index=['x', 'y'])) table = biom.Table(np.array([[1, 2, 3], [30, 20, 10]]), sample_ids=sample_mc.to_series().index, observation_ids=feature_mc.to_series().index) # Sample x Sum result = group(table, axis='sample', metadata=sample_mc, mode='sum') self.assertEqual(table, result) # Sample x Mean result = group(table, axis='sample', metadata=sample_mc, mode='mean-ceiling') self.assertEqual(table, result) # Sample x Median result = group(table, axis='sample', metadata=sample_mc, mode='median-ceiling') self.assertEqual(table, result) # Feature x Sum result = group(table, axis='feature', metadata=feature_mc, mode='sum') self.assertEqual(table, result) # Feature x Mean result = group(table, axis='feature', metadata=feature_mc, mode='mean-ceiling') self.assertEqual(table, result) # Feature x Median result = group(table, axis='feature', metadata=feature_mc, mode='median-ceiling') self.assertEqual(table, result)
def test_no_sample_cluster(self): md = qiime2.MetadataCategory( pd.Series(['milo', 'summer', 'russ'], name='pet', index=['S1', 'S2', 'S3'])) heatmap(self.output_dir, self.table, metadata=md, cluster='features') self.assertBasicVizValidity(self.output_dir)
def test_non_numeric_category(self): md = qiime2.MetadataCategory( pd.Series(['x1', 'x2', '3', '4'], name='number', index=['sample1', 'sample2', 'sample3', 'sample4'])) with self.assertRaisesRegex(ValueError, 'non-numeric values.*\n\n.*x1'): distance_matrix(md)
def test_bad_method(self): alpha_div = pd.Series([2.0, 4.0, 6.0], name='alpha-div', index=['sample1', 'sample2', 'sample3']) md = qiime2.MetadataCategory( pd.Series(['1.0', '2.0', '3.0'], name='value', index=['sample1', 'sample2', 'sample3'])) with tempfile.TemporaryDirectory() as output_dir: with self.assertRaises(ValueError): alpha_correlation(output_dir, alpha_div, md, method='bad!')
def test_error_on_missing_metadata(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]], ids=['sample1', 'sample2', 'sample3']) md = qiime2.MetadataCategory( pd.Series([1, 2], name='number', index=['sample1', 'sample2'])) with tempfile.TemporaryDirectory() as output_dir: with self.assertRaisesRegex(ValueError, 'no data: sample3'): beta_correlation(output_dir, dm, md)
def test_float_category(self): md = qiime2.MetadataCategory( pd.Series([1.5, 2.0, 3.0], name='number', index=['sample1', 'sample2', 'sample3'])) exp = skbio.DistanceMatrix( [[0.0, 0.5, 1.5], [0.5, 0.0, 1.0], [1.5, 1.0, 0.0]], ids=['sample1', 'sample2', 'sample3']) obs = distance_matrix(md) self.assertEqual(exp, obs)
def test_empty_table(self): mc = qiime2.MetadataCategory( pd.Series(['a_new', 'b_new'], index=['a', 'b'])) table = biom.Table(np.array([[]]), sample_ids=[], observation_ids=[]) with self.assertRaisesRegex(ValueError, 'empty table'): group(table, axis='sample', metadata=mc, mode='sum') with self.assertRaisesRegex(ValueError, 'empty table'): group(table, axis='feature', metadata=mc, mode='sum')
def test_int_category(self): md = qiime2.MetadataCategory( pd.Series([1, 2, 3], name='number', index=['sample1', 'sample2', 'sample3'])) exp = skbio.DistanceMatrix([[0, 1, 2], [1, 0, 1], [2, 1, 0]], ids=['sample1', 'sample2', 'sample3']) obs = distance_matrix(md) self.assertEqual(exp, obs)
def test_invalid_method(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]], ids=['sample1', 'sample2', 'sample3']) md = qiime2.MetadataCategory( pd.Series(['a', 'b', 'b'], name='a or b', index=['sample1', 'sample2', 'sample3'])) with self.assertRaises(ValueError): with tempfile.TemporaryDirectory() as output_dir: beta_group_significance(output_dir, dm, md, method='bad!')
def test_error_on_non_numeric_metadata(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]], ids=['sample1', 'sample2', 'sample3']) md = qiime2.MetadataCategory( pd.Series([1.0, 2.0, 'hello-world'], name='number', index=['sample1', 'sample2', 'sample3'])) with tempfile.TemporaryDirectory() as output_dir: with self.assertRaisesRegex(ValueError, 'Non-numeric data was'): beta_correlation(output_dir, dm, md)
def test_extra_metadata(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]], ids=['sample1', 'sample2', 'sample3']) md = qiime2.MetadataCategory( pd.Series(['a', 'b', 'b', 'c'], name='a or b', index=['sample1', 'sample2', 'sample3', 'sample4'])) with tempfile.TemporaryDirectory() as output_dir: beta_group_significance(output_dir, dm, md, permutations=42) index_fp = os.path.join(output_dir, 'index.html') self.assertTrue('<td>2</td>' in open(index_fp).read())
def test_missing_feature_ids(self): feature_mc = qiime2.MetadataCategory( pd.Series(['g0', 'g1', 'g2', 'g1', 'g2', 'extra'], index=['a', 'c', 'd', 'e', 'f', 'g'])) data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5], [0, 1, 100], [7, 8, 9]]) # g is missing on purpose table = biom.Table(data, sample_ids=['s1', 's2', 's3'], observation_ids=['a', 'b', 'c', 'd', 'e', 'f']) with self.assertRaisesRegex(ValueError, 'metadata.*missing: {\'b\'}'): group(table, axis='feature', metadata=feature_mc, mode='sum')
def test_filtered_samples_str_metadata(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25, 0.66], [0.25, 0.00, 0.00, 0.66], [0.25, 0.00, 0.00, 0.66], [0.66, 0.66, 0.66, 0.00]], ids=['sample1', 'sample2', 'sample3', 'sample4']) md = qiime2.MetadataCategory( pd.Series(['a', 'b', 'b', ''], name='a or b', index=['sample1', 'sample2', 'sample3', 'sample4'])) with tempfile.TemporaryDirectory() as output_dir: beta_group_significance(output_dir, dm, md) index_fp = os.path.join(output_dir, 'index.html') self.assertTrue('Warning' in open(index_fp).read())
def test_str_casting(self): md = qiime2.MetadataCategory( pd.Series(['1', '2', '3', '4'], name='number', index=['sample1', 'sample2', 'sample3', 'sample4'])) exp = skbio.DistanceMatrix( [[0.0, 1.0, 2.0, 3.0], [1.0, 0.0, 1.0, 2.0], [2.0, 1.0, 0.0, 1.0], [3.0, 2.0, 1.0, 0.0]], ids=['sample1', 'sample2', 'sample3', 'sample4']) obs = distance_matrix(md) self.assertEqual(exp, obs)
def test_missing_sample_ids(self): sample_mc = qiime2.MetadataCategory( pd.Series(['g0', 'g2', 'g0', 'g2'], index=['s1', 's3', 's4', 's6'])) data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]]) table = biom.Table(data, sample_ids=['s1', 's2', 's4', 's5'], observation_ids=['x', 'y', 'z']) with self.assertRaisesRegex(ValueError, 'metadata.*missing:') as e: group(table, axis='sample', metadata=sample_mc, mode='sum') self.assertIn('s2', str(e.exception)) self.assertIn('s5', str(e.exception))
def test_reorder(self): sample_mc = qiime2.MetadataCategory( pd.Series(['c', 'b', 'a'], index=['c', 'b', 'a'])) data = np.array([[1, 2, 3], [30, 20, 10]]) table = biom.Table(data, sample_ids=['a', 'b', 'c'], observation_ids=['x', 'y']) expected = biom.Table(np.array([[3, 2, 1], [10, 20, 30]]), sample_ids=['c', 'b', 'a'], observation_ids=['x', 'y']) result = group(table, axis='sample', metadata=sample_mc, mode='sum') self.assertEqual(expected, result)
def test_superset_sample_group(self): sample_mc = qiime2.MetadataCategory( pd.Series(['g0', 'g1', 'g2', 'g0', 'g1', 'g2'], index=['s1', 's2', 's3', 's4', 's5', 's6'])) data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]]) table = biom.Table(data, sample_ids=['s1', 's2', 's4', 's5'], observation_ids=['x', 'y', 'z']) expected = biom.Table(np.array([[2, 4], [22, 24], [220, 240]]), sample_ids=['g0', 'g1'], observation_ids=['x', 'y', 'z']) result = group(table, axis='sample', metadata=sample_mc, mode='sum') self.assertEqual(expected, result)
def test_evaluate_composition_metadata_not_superset(self): incomplete_md = qiime2.MetadataCategory( pd.DataFrame({'mock_id': ['there_can_only_be_one']}, index=['s3'])['mock_id']) with self.assertRaisesRegex(ValueError, "Missing samples in metadata"): _evaluate_composition(self.exp_one_sample, self.obs, depth=7, palette='Set1', plot_tar=True, plot_tdr=True, plot_r_value=True, plot_r_squared=True, plot_observed_features=True, plot_observed_features_ratio=True, metadata=incomplete_md)
def test_superset_feature_group(self): feature_mc = qiime2.MetadataCategory( pd.Series(['g0', 'g0', 'g1', 'g2', 'g1', 'g2', 'extra'], index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])) data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5], [0, 1, 100], [7, 8, 9]]) # g is missing on purpose table = biom.Table(data, sample_ids=['s1', 's2', 's3'], observation_ids=['a', 'b', 'c', 'd', 'e', 'f']) expected = biom.Table(np.array([[2, 10, 10], [0, 1, 200], [12, 13, 14]]), sample_ids=['s1', 's2', 's3'], observation_ids=['g0', 'g1', 'g2']) result = group(table, axis='feature', metadata=feature_mc, mode='sum') self.assertEqual(expected, result)
def test_subsample_higher_than_seqs_count(self): barcodes = self.barcodes[:1] sequences = self.sequences[:1] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) barcode_map = pd.Series(['AAAA'], index=['sample1']) barcode_map = qiime2.MetadataCategory(barcode_map) demux_data = emp_single(bsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=False), n=50) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<strong>Warning:</strong>', html)