Exemple #1
0
 def test_complex_sum_class(self):
     out_df = aggregate_at_taxon_level(self.df, taxon_level='class', 
                                       simple_aggregation=False, 
                                       grouping_cols=self.grouping_cols)
     if generate_output_files:
         out_df.to_csv('complex_sum_class.csv', index=False)
     exp_df = pd.read_csv(self.complex_sum_class)
     self.assertTrue(out_df.equals(exp_df))  
Exemple #2
0
 def test_simple_sum(self):
     out_df = aggregate_at_taxon_level(self.df, taxon_level='genus', 
                                       simple_aggregation=True, 
                                       grouping_cols=self.grouping_cols)
     if generate_output_files:
         out_df.to_csv('simple_sum.csv', index=False)
     exp_df = pd.read_csv(self.simple_sum)
     self.assertTrue(out_df.equals(exp_df))
Exemple #3
0
 def test_complex_keep_all_cols(self):
     out_df = aggregate_at_taxon_level(self.df, taxon_level='genus', 
                                       simple_aggregation=False, 
                                       grouping_cols=self.grouping_cols,
                                       keep_all_cols=True)
     out_columns = set(out_df.columns)
     exp_columns = set(self.df.columns)
     exp_columns.remove('species')
     self.assertEqual(out_columns, exp_columns) 
Exemple #4
0
 def test_simple_keep_all_cols(self):
     out_df = aggregate_at_taxon_level(self.df, taxon_level='genus', 
                                       simple_aggregation=True, 
                                       grouping_cols=self.grouping_cols,
                                       keep_all_cols=True)
     out_columns = set(out_df.columns)
     exp_columns = set(self.df.columns).difference(self.taxon_cols)
     exp_columns.add('genus')
     self.assertEqual(out_columns, exp_columns) 
Exemple #5
0
 def test_custom_dummy_values(self):
     temp_df = self.df.copy()
     temp_df.loc[temp_df['genus'] == 'g__', 'genus'] = 'CUSTOM'
     print(temp_df)
     out_df = aggregate_at_taxon_level(temp_df, taxon_level='genus',
                                       dummy_values={'genus': 'CUSTOM'},
                                       simple_aggregation=True, 
                                       grouping_cols=self.grouping_cols)
     if generate_output_files:
         out_df.to_csv('simple_sum_custom_dummy.csv', index=False)
     exp_df = pd.read_csv(self.simple_sum_custom_dummy)
     self.assertTrue(out_df.equals(exp_df))
Exemple #6
0
 def test_not_all_required_grouping_cols_in_df(self):
     self.df.rename(columns={'experiment_id': 'NONE'}, inplace=True)
     with self.assertRaises(ValueError):
         aggregate_at_taxon_level(self.df, taxon_level='genus', 
                                  simple_aggregation=True, 
                                  grouping_cols=self.grouping_cols)
Exemple #7
0
 def test_no_taxon_level_in_df(self):
     self.df.rename(columns={'genus': 'no_genus'}, inplace=True)
     with self.assertRaises(ValueError):
         aggregate_at_taxon_level(self.df, taxon_level='genus', 
                                  simple_aggregation=True, 
                                  grouping_cols=self.grouping_cols)
Exemple #8
0
 def test_unrecognized_taxon_level(self):
     with self.assertRaises(ValueError):
         aggregate_at_taxon_level(self.df, taxon_level='gibberish', 
                                  simple_aggregation=True, 
                                  grouping_cols=self.grouping_cols)
Exemple #9
0
 def test_unrecognized_dummy_value_key(self):
     with self.assertRaises(ValueError):
         aggregate_at_taxon_level(self.df, taxon_level='genus',
                                  dummy_values={'gibberish': 'g__'},
                                  simple_aggregation=True, 
                                  grouping_cols=self.grouping_cols)