Beispiel #1
0
 def test_calculate_bins_multiple_columns(self):
     """Should return a list of evenly spaced bins between the smallest and highest value over all columns"""
     hist = Histogram(bins=3)
     test_df = self.create_test_df(
     )  # The lowest value in this DF is 1, the highest is 4
     hist.add_column(test_df.select(F.col('value')))
     hist.add_column(test_df.select(F.col('value2')))
     self.assertListEqual([1, 2, 3, 4], hist._calculate_bins())
Beispiel #2
0
 def test_add_hist_single_column_sets_bin_list(self):
     """Should set the bin list if this is a single number"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     hist.add_column(column_to_ad)
     hist.bin_list = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     self.assertEqual(3, len(hist.bin_list))
Beispiel #3
0
 def test_add_hist_single_column(self):
     """Should add a list of bin values (e.g. the number of values that fall in a bin) to the hist_dict, where
     the key is the column name. If multiple columns have the same name a number is appended"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     hist.add_column(column_to_ad)
     hist.bin_list = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     self.assertEqual(1, len(hist.hist_dict))
     self.assertListEqual([1, 2], hist.hist_dict['value'])
Beispiel #4
0
 def test_add_hist_multiple_column_rename_column(self):
     """Should rename the column name if the same column name is added"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     column_to_ad_2 = test_df.select(F.col('value'))
     hist.add_column(column_to_ad)
     hist.add_column(column_to_ad_2)
     hist.bin_list = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     hist._add_hist(column_to_ad_2, 'value')
     self.assertEqual(2, len(hist.hist_dict))
     self.assertTrue('value (1)' in hist.hist_dict)
Beispiel #5
0
 def test_add_hist_multiple_column(self):
     """Should add a second list of bin values to the hist_dict"""
     hist = Histogram(bins=2)
     test_df = self.create_test_df()
     column_to_ad = test_df.select(F.col('value'))
     column_to_ad_2 = test_df.select(F.col('value2'))
     hist.add_column(column_to_ad)
     hist.add_column(column_to_ad_2)
     hist.bin_list = hist._calculate_bins()
     hist._add_hist(column_to_ad, 'value')
     hist._add_hist(column_to_ad_2, 'value2')
     self.assertEqual(2, len(hist.hist_dict))
     self.assertListEqual([1, 2], hist.hist_dict['value2'])
Beispiel #6
0
 def test_calculate_bins_single_column(self):
     """Should return the number of bins when there is only a single column, and no min and max is set"""
     hist = Histogram(bins=5)
     test_df = self.create_test_df()
     hist.add_column(test_df.select(F.col('value')))
     self.assertEqual(5, hist._calculate_bins())
Beispiel #7
0
 def test_calculate_bins_bins_set(self):
     """Should just return the list of bins edges when this was set in the constructor"""
     hist = Histogram(bins=[1, 2, 3])
     self.assertListEqual([1, 2, 3], hist._calculate_bins())
Beispiel #8
0
 def test_calculate_bins(self):
     """Should return a list of evenly spaced bins between min and max bin if they are set"""
     hist = Histogram(range=(5, 10), bins=2)
     self.assertListEqual([5, 7.5, 10], hist._calculate_bins())