Esempio n. 1
0
 def test_eval_type_result_1(self):
     df = pd.DataFrame(
         data={
             'sample_1': ['24', 'a'],
             'sample_2': ['13.6', 'b'],
             'sample_3': [np.nan, 'a']
         })
     _, types = data.eval_type(df)
     self.assertTrue(compare_numpy_array(types, np.array([float, object])))
Esempio n. 2
0
 def test_dicretize_vector_continuous_jenks(self):
     ar = np.array([
         1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, np.nan, 4, 4, 4, 4, 5, 5, 5, 6,
         6, np.nan
     ])
     res = data.discretize_vector(ar, func='jenks', num_bins=3)
     expected_res = np.array([
         0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3
     ])
     self.assertTrue(compare_numpy_array(res, expected_res))
Esempio n. 3
0
 def test_eval_type_result_3(self):
     df = pd.DataFrame(
         data={
             'sample_1': ['24', 'a'],
             'sample_2': ['13.6', 'b'],
             'sample_3': [np.nan, 'a']
         })
     updated_df, _ = data.eval_type(df)
     self.assertTrue(
         compare_numpy_array(updated_df.iloc[1].to_numpy(),
                             np.array(['a', 'b', 'a'])))
Esempio n. 4
0
 def test_transform_log_2(self):
     df = pd.DataFrame(
         data={
             'sample_1': [24, 'a'],
             'sample_2': [13.6, 'b'],
             'sample_3': [np.nan, 'a']
         })
     types = np.array([float, object])
     updated_df = data.transform(df, types, ['log'])
     self.assertTrue(
         compare_numpy_array(updated_df.iloc[1].to_numpy(),
                             np.array(['a', 'b', 'a'])))
Esempio n. 5
0
 def test_transform_sqrt(self):
     df = pd.DataFrame(
         data={
             'sample_1': [24, 'a'],
             'sample_2': [-13.6, 'b'],
             'sample_3': [np.nan, 'a']
         })
     types = np.array([float, object])
     row = np.array([24, -13.6, np.nan])
     updated_df = data.transform(df, types, ['sqrt'])
     self.assertTrue(
         compare_numpy_array(updated_df.iloc[0].to_numpy(),
                             np.sqrt(np.abs(row)) * np.sign(row)))
Esempio n. 6
0
 def test_transform_multiple_functions(self):
     df = pd.DataFrame(
         data={
             'sample_1': [24, 'a'],
             'sample_2': [-13.6, 'b'],
             'sample_3': [np.nan, 'a'],
             'sample_4': [10, 'a']
         })
     types = np.array([float, object])
     updated_df = data.transform(df, types, ['zscore', 'sqrt'])
     expected_res = zscore(np.array([24, -13.6, np.nan, 10]),
                           nan_policy='omit')
     expected_res = np.sqrt(np.abs(expected_res)) * np.sign(expected_res)
     self.assertTrue(
         compare_numpy_array(updated_df.iloc[0].to_numpy(), expected_res))
Esempio n. 7
0
 def test_discretize_vector_categorical_missing_data(self):
     ar = np.array(
         ['a', 'c', 'b', 'b', 'c', np.nan, 'a', 'd', 'a', 'd', 'e', np.nan])
     res = data.discretize_vector(ar, ar_type=object)
     expected_res = np.array([0, 1, 2, 2, 1, 3, 0, 4, 0, 4, 5, 3])
     self.assertTrue(compare_numpy_array(res, expected_res))
Esempio n. 8
0
 def test_discretize_vector_categorical(self):
     ar = np.array(['a', 'c', 'b', 'b', 'c', 'b', 'a', 'd', 'a', 'd', 'e'])
     res = data.discretize_vector(ar, ar_type=object)
     expected_res = np.array([0, 1, 2, 2, 1, 2, 0, 3, 0, 3, 4])
     self.assertTrue(compare_numpy_array(res, expected_res))