def test_first_differences_single_sample(self): single_sam = qiime2.Metadata(md[(md['ind'] == 1) & (md['Time'] == 1)]) with self.assertRaisesRegex(ValueError, "state_column must contain"): first_differences( metadata=single_sam, state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='drop')
def test_first_differences_nonnumeric_metric_error(self): with self.assertRaisesRegex(ValueError, "not a numeric"): first_differences(metadata=self.md_ecam_fp, state_column='month', individual_id_column='studyid', metric='delivery', replicate_handling='drop')
def test_first_differences_baseline_invalid_baseline(self): with self.assertRaisesRegex(ValueError, "must be a valid state"): first_differences( metadata=qiime2.Metadata(md_one_subject_many_times), state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='drop', baseline=27)
def test_first_differences_taxa(self): exp = pd.read_csv(self.get_data_path( 'ecam-taxa-first-differences.tsv'), sep='\t', squeeze=True, index_col=0) obs = first_differences( metadata=self.md_ecam_fp, state_column='month', individual_id_column='studyid', metric='e2c3ff4f647112723741aa72087f1bfa', replicate_handling='drop', table=self.table_ecam_fp) pdt.assert_series_equal(obs, exp)
def test_first_differences_static(self): exp = pd.Series([0., 0., 0., 0., 0., 0.], index=['3', '4', '5', '9', '10', '11'], name='Difference') exp.index.name = '#SampleID' obs = first_differences( metadata=qiime2.Metadata(md_static), state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='drop') pdt.assert_series_equal(obs.sort_index(), exp.sort_index())
def test_first_differences_single_individual(self): exp = pd.Series([0.08], index=['3'], name='Difference') exp.index.name = '#SampleID' single_ind = qiime2.Metadata(md[md['ind'] == 1]) obs = first_differences(metadata=single_ind, state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='drop') pdt.assert_series_equal(obs.sort_index(), exp.sort_index())
def test_first_differences_baseline(self): exp = pd.Series( [-0.01, 0., 0.01, 0.07, 0.06, 0.09, 0.07, 0.1, 0.15, 0.12, 0.16], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'], name='Difference') exp.index.name = '#SampleID' obs = first_differences( metadata=qiime2.Metadata(md_one_subject_many_times), state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='drop', baseline=0) pdt.assert_series_equal(obs, exp)
def test_first_distances_numeric_values_represented_as_strings(self): numeric_values_represented_as_strings = pd.DataFrame( [('0', '0.18', '1'), ('1', '0.21', '1')], columns=['Time', 'Value', 'ind'], index=['0', '1']) exp = pd.Series([0.03], index=['1'], name='Difference') exp.index.name = '#SampleID' obs = first_differences( metadata=qiime2.Metadata(numeric_values_represented_as_strings), state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='drop') pdt.assert_series_equal(obs, exp)
def test_first_differences_drop_duplicates(self): obs = first_differences( metadata=qiime2.Metadata(md_dup), state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='random') # The first diff of individual 2 is subject to random rep handling mystery_number = obs.iloc[1] if mystery_number < 0.051: self.assertAlmostEqual(mystery_number, 0.05) else: self.assertAlmostEqual(mystery_number, 0.06) # but other values are constant, so we will just drop in the mystery # value and the exp/obs series should match. exp = pd.Series([0.08, mystery_number, 0.12, 0.14, 0.14999999999999997], index=['3', '4', '9', '10', '11'], name='Difference') exp.index.name = '#SampleID' pdt.assert_series_equal(obs.sort_index(), exp.sort_index())
def test_first_differences_empty(self): with self.assertRaisesRegex(ValueError, "Metadata is empty"): first_differences( metadata=(qiime2.Metadata(pd.DataFrame({'a': [], 'b': []}))), state_column='Time', individual_id_column='ind', metric='Value', replicate_handling='drop')