Esempio n. 1
0
 def test_add_hist_single_column_sets_bin_list(self):
     """Should set the bin list if this is a single number"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     hist.add_column(column_to_ad)
     hist.bin_boundaries = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     self.assertEqual(3, len(hist.bin_boundaries))
Esempio n. 2
0
 def test_add_hist_single_column(self):
     """Should add a list of bin values (e.g. the number of values that fall in a bin) to the hist_dict, where
     the key is the column name. If multiple columns have the same name a number is appended"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     hist.add_column(column_to_ad)
     hist.bin_boundaries = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     self.assertEqual(1, len(hist.hist_dict))
     self.assertListEqual([1, 2], hist.hist_dict['value'])
Esempio n. 3
0
 def test_add_hist_multiple_column_rename_column(self):
     """Should rename the column name if the same column name is added"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     column_to_ad_2 = test_df.select(F.col('value'))
     hist.add_column(column_to_ad)
     hist.add_column(column_to_ad_2)
     hist.bin_boundaries = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     hist._add_hist(column_to_ad_2, 'value')
     self.assertEqual(2, len(hist.hist_dict))
     self.assertTrue('value (1)' in hist.hist_dict)
Esempio n. 4
0
 def test_add_hist_multiple_column(self):
     """Should add a second list of bin values to the hist_dict"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     column_to_ad_2 = test_df.select(F.col('value2'))
     hist.add_column(column_to_ad)
     hist.add_column(column_to_ad_2)
     hist.bin_boundaries = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     hist._add_hist(column_to_ad_2, 'value2')
     self.assertEqual(2, len(hist.hist_dict))
     self.assertListEqual([1, 2], hist.hist_dict['value2'])