Esempio n. 1
0
    def test_bhattacharyya_no_column_not_in_both_crossover_cat_to_cont(self):
        cat = ['cat_same_dist', 'cat_diff_dist', 'cat_not_in_both']
        cont = ['cont_same_dist', 'cont_diff_dist']
        d = sf.bhattacharyya(self.df_1, self.df_2, categorical_variables=cat, continuous_variables=cont)

        self.assertTrue(d.isnull()['cat_not_in_both'])
        self.assertTrue((~d.isnull().drop('cat_not_in_both')).all())
Esempio n. 2
0
 def test_bhattacharyya_cat_distance(self):
     var = ['cat_same_dist', 'cat_diff_dist']
     d = sf.bhattacharyya(self.df_1, self.df_2, categorical_variables=var)
     self.assertTrue(isinstance(d, pd.Series))
     self.assertTrue(all([v in d.index for v in var]+[v in var for v in d.index]))
     self.assertEqual(0.0, d['cat_same_dist'])
     self.assertEqual(0.0614, np.round(d['cat_diff_dist'], decimals=4))
Esempio n. 3
0
 def test_bhattacharyya_cont_distance(self):
     var = ['cont_same_dist', 'cont_diff_dist']
     d = sf.bhattacharyya(self.df_1, self.df_2, continuous_variables=var)
     self.assertTrue(isinstance(d, pd.Series))
     self.assertTrue(all([v in d.index for v in var]+[v in var for v in d.index]))
     self.assertEqual(0.0138, np.round(d['cont_same_dist'], decimals=4))
     self.assertEqual(0.4205, np.round(d['cont_diff_dist'], decimals=4))
Esempio n. 4
0
 def test_bhattacharyya_full_calculation(self):
     cat = ['cat_same_dist', 'cat_diff_dist']
     cont = ['cont_same_dist', 'cont_diff_dist']
     d = sf.bhattacharyya(self.df_1, self.df_2, categorical_variables=cat, continuous_variables=cont)
     self.assertTrue(isinstance(d, pd.Series))
     self.assertTrue(all([v in d.index for v in cont]
                         + [(v in cat) or (v in cont) for v in d.index]
                         + [v in d.index for v in cat]))
     self.assertEqual(0.0, d['cat_same_dist'])
     self.assertEqual(0.0614, np.round(d['cat_diff_dist'], decimals=4))
     self.assertEqual(0.0138, np.round(d['cont_same_dist'], decimals=4))
     self.assertEqual(0.4205, np.round(d['cont_diff_dist'], decimals=4))
Esempio n. 5
0
 def test_bhattacharyya_cat_column_not_in_both(self):
     var = ['cat_same_dist', 'cat_diff_dist', 'cat_not_in_both']
     d = sf.bhattacharyya(self.df_1, self.df_2, categorical_variables=var)
     self.assertTrue(isinstance(d, pd.Series))
     self.assertTrue(all([v in d.index for v in var] + [v in var for v in d.index]))
     self.assertTrue(d.isnull()['cat_not_in_both'])
Esempio n. 6
0
 def test_bhattacharyya_cat_str_input(self):
     var = 'cat_same_dist'
     d = sf.bhattacharyya(self.df_1, self.df_2, categorical_variables=var)
     self.assertTrue(isinstance(d, pd.Series))
     self.assertTrue(var in d.index)
     self.assertEqual(0.0, d[var])
Esempio n. 7
0
 def test_bhattacharyya_wrong_number_of_integration_points(self):
     cont = ['cont_same_dist', 'cont_diff_dist']
     with self.assertRaises(AssertionError) as ctx:
         d = sf.bhattacharyya(self.df_1, self.df_2, continuous_variables=cont, continuous_integration_points=1024)
     self.assertEqual('The number of integration points must be 2**n+1 where n is a non-negative integer',
                      str(ctx.exception))
Esempio n. 8
0
 def test_bhattacharyya_no_columns_passed(self):
     self.assertIsNone(sf.bhattacharyya(self.df_1, self.df_2))
Esempio n. 9
0
 def test_bhattacharyya_wrong_type_continuous_variables(self):
     with self.assertRaises(AssertionError) as ctx:
         sf.bhattacharyya(self.df_1, self.df_2, continuous_variables=123)
     self.assertEqual("continuous_variables must be a string, a list or None", str(ctx.exception))
Esempio n. 10
0
 def test_bhattacharyya_wrong_type_df_2(self):
     with self.assertRaises(AssertionError) as ctx:
         sf.bhattacharyya(self.df_1, self.str_2)
     self.assertEqual("df_2 must be a pandas DataFrame", str(ctx.exception))
Esempio n. 11
0
 def test_bhattacharyya_missing_one_input(self):
     with self.assertRaises(TypeError):
         sf.bhattacharyya()
Esempio n. 12
0
 def test_bhattacharyya_cont_non_numeric_column(self):
     var = ['cont_same_dist', 'cont_diff_dist', 'cont_not_in_both']
     df = self.df_2.assign(cont_not_in_both=np.random.choice(['a','b','c'], size=self.df_2.shape[0]))
     with self.assertRaises(AssertionError) as ctx:
         sf.bhattacharyya(self.df_1, df, continuous_variables=var)
     self.assertEqual('All continuous variables must be numerical', str(ctx.exception))
Esempio n. 13
0
 def test_bhattacharyya_cont_str_input(self):
     var = 'cont_same_dist'
     d = sf.bhattacharyya(self.df_1, self.df_2, continuous_variables=var)
     self.assertTrue(isinstance(d, pd.Series))
     self.assertTrue(var in d.index)
     self.assertEqual(0.0138, np.round(d[var], decimals=4))