def test_create_col_names_numeric(data_raw): expander = DataFrameETL(cols_to_expand=['pid', 'fruits'], cols_to_drop=['djinn_type', 'animal'], dummy_na=True) expander._is_numeric = {'pid': 0, 'djinn_type': 0, 'fruits': 0} expander._nan_numeric = NAN_NUMERIC expander._nan_string = NAN_STRING expander._cols_to_drop = expander.cols_to_drop expander._cols_to_expand = expander.cols_to_expand expander.levels_ = expander._create_levels(data_raw) (cnames, unexpanded) = expander._create_col_names(data_raw) cols_numeric = ['pid_a', 'pid_b', 'pid_c', 'pid_NaN', 'fruits_1.0', 'fruits_3.0', 'fruits_NaN', 'age'] assert cnames == cols_numeric assert unexpanded == ['pid', 'fruits', 'age']
def test_create_col_names_no_dummy(data_raw): expander = DataFrameETL(cols_to_expand=['pid', 'djinn_type', 'animal'], cols_to_drop=['fruits'], dummy_na=False) expander._nan_sentinel = NAN_STRING expander._cols_to_drop = expander.cols_to_drop expander._cols_to_expand = expander.cols_to_expand expander.levels_ = expander._create_levels(data_raw) (cnames, unexpanded) = expander._create_col_names(data_raw) cols_expected = [ 'pid_a', 'pid_b', 'pid_c', 'djinn_type_effrit', 'djinn_type_marid', 'djinn_type_sila', 'age', 'animal_cat', 'animal_dog' ] assert cnames == cols_expected assert unexpanded == ['pid', 'djinn_type', 'age', 'animal']
def test_create_col_names_numeric(data_raw): expander = DataFrameETL(cols_to_expand=['pid', 'fruits'], cols_to_drop=['djinn_type', 'animal'], dummy_na='expanded') expander._nan_sentinel = NAN_STRING expander._cols_to_drop = expander.cols_to_drop expander._cols_to_expand = expander.cols_to_expand expander._dummy_na = 'expanded' expander.levels_ = expander._create_levels(data_raw) expander._unexpanded_nans = expander._flag_unexpanded_nans(data_raw) (cnames, unexpanded) = expander._create_col_names(data_raw) cols_numeric = [ 'pid_a', 'pid_b', 'pid_c', 'pid_NaN', 'fruits_1.0', 'fruits_3.0', 'fruits_NaN', 'age' ] assert cnames == cols_numeric assert unexpanded == ['pid', 'fruits', 'age']
def test_create_col_names(data_raw): expander = DataFrameETL(cols_to_expand=['pid', 'djinn_type', 'animal'], cols_to_drop=['fruits'], dummy_na=True) expander._is_numeric = {'pid': 0, 'djinn_type': 0, 'animal': 0} expander._nan_numeric = NAN_NUMERIC expander._nan_string = NAN_STRING expander._cols_to_drop = expander.cols_to_drop expander._cols_to_expand = expander.cols_to_expand expander.levels_ = expander._create_levels(data_raw) (cnames, unexpanded) = expander._create_col_names(data_raw) cols_expected = ['pid_a', 'pid_b', 'pid_c', 'pid_NaN', 'djinn_type_effrit', 'djinn_type_marid', 'djinn_type_sila', 'djinn_type_NaN', 'age', 'animal_cat', 'animal_dog', 'animal_NaN'] assert cnames == cols_expected assert unexpanded == ['pid', 'djinn_type', 'age', 'animal']