def test_check_sentinels(data_raw): expander = DataFrameETL(cols_to_expand=['pid', 'djinn_type', 'animal', 'fruits']) # fill in necessary parameters expander._nan_string = 'effrit' expander._nan_numeric = 1.0 expander._is_numeric = {} expander.levels_ = {} expander._cols_to_drop = expander.cols_to_drop expander._cols_to_expand = expander.cols_to_expand for col in expander.cols_to_expand: expander._is_numeric[col] = expander._flag_numeric( pd.unique(data_raw[col])) expander._check_sentinels(data_raw) assert expander._nan_string is not 'effrit' assert expander._nan_numeric is not 1.0 assert not (data_raw[['pid', 'djinn_type', 'animal']] == expander._nan_string).any().any() assert not (data_raw['fruits'] == expander._nan_numeric).any().any()
def test_flag_numeric(): test1 = [1, 'a', 'b'] test2 = [5.55, 0, np.nan] expander = DataFrameETL() assert expander._flag_numeric(test1) is False assert expander._flag_numeric(test2) is True