def test_expand_col_no_dummy(data_raw): expander = DataFrameETL(cols_to_drop=['fruits'], dummy_na=None, fill_value=-1.0) expander.fit(data_raw) arr = expander._expand_col(data_raw, 'pid') arr_exp = np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]) assert_almost_equal(arr, arr_exp) arr = expander._expand_col(data_raw, 'animal') arr_exp = np.array([[1., 0.], [0., 1.], [-1., -1.]]) assert_almost_equal(arr, arr_exp)
def test_expand_col(data_raw): expander = DataFrameETL(cols_to_drop=['fruits'], dummy_na='expanded', fill_value=-1.0) expander.fit(data_raw) # should expand even if there are no NaNs arr = expander._expand_col(data_raw, 'pid') arr_exp = np.array([[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]]) assert_almost_equal(arr, arr_exp) arr = expander._expand_col(data_raw, 'animal') arr_exp = np.array([[1., 0., 0.], [0., 1., 0.], [-1., -1., 1.]]) assert_almost_equal(arr, arr_exp)
def test_expand_col_few_levels_no_dummy(data_few_levels, few_levels_expected): expander = DataFrameETL(cols_to_expand=['pid', 'fruits', 'animal'], dummy_na=False, fill_value=99.) expander.fit(data_few_levels) arr = expander._expand_col(data_few_levels, 'pid') expected_array = np.asarray(few_levels_expected[['pid_NaN', 'pid_a']]) assert_almost_equal(arr, expected_array) arr = expander._expand_col(data_few_levels, 'fruits') expected_array = np.asarray(few_levels_expected[['fruits_1.0']]) assert_almost_equal(arr, expected_array) arr = expander._expand_col(data_few_levels, 'animal') expected_array = np.asarray(few_levels_expected[['animal_cat']]) assert_almost_equal(arr, expected_array)
def test_expand_col_numeric_no_dummy(data_raw): expander = DataFrameETL(cols_to_drop=['pid', 'djinn_type', 'animal'], cols_to_expand=['fruits'], dummy_na=False, fill_value=np.nan) expander.fit(data_raw) arr = expander._expand_col(data_raw, 'fruits') arr_exp = np.array([[1., 0.], [np.nan, np.nan], [0., 1.]]) assert_almost_equal(arr, arr_exp)