def test_encode_nans(self, X, y, expected, columns): enc = WeightOfEvidenceEncoder(cols=['cat']) result = enc.fit_transform(pd.DataFrame(X, columns=['cat']), pd.Series(y)) assert_array_almost_equal(result, pd.DataFrame(expected), decimal=3) ok_('cat' in enc._mapping) ok_(isinstance(enc._mapping['cat'], pd.DataFrame)) eq_(enc._mapping['cat'].index[0], -99999) assert_array_equal(enc._mapping['cat'].index[1:], columns) assert_array_equal(enc._mapping['cat'].columns, ['pos', 'count', 'neg', 'value'])
def test_encode_multiple_cols(self, X, y, expected): enc = WeightOfEvidenceEncoder(cols=['cat1', 'cat2']) result = enc.fit_transform(pd.DataFrame(X, columns=['cat1', 'cat2']), pd.Series(y)) assert_array_almost_equal(result, pd.DataFrame(expected), decimal=2) ok_('cat1' in enc._mapping) ok_('cat2' in enc._mapping) ok_(isinstance(enc._mapping['cat1'], pd.DataFrame)) ok_(isinstance(enc._mapping['cat2'], pd.DataFrame)) assert_array_equal(enc._mapping['cat1'].index, ['a', 'b']) assert_array_equal(enc._mapping['cat2'].index, ['bar', 'foo']) assert_array_equal(enc._mapping['cat1'].columns, ['pos', 'count', 'neg', 'value']) assert_array_equal(enc._mapping['cat2'].columns, ['pos', 'count', 'neg', 'value'])
'addr_state']) dumy_df = pd.DataFrame() dumy_df['Default_Binary'] = df.loan_status.isin([ 'Default', 'Charged Off', 'Late (31-120 days)', 'Does not meet the credit policy. Status:Charged Off' ]) dumy_df['Default_Binary'] = dumy_df.Default_Binary.astype(int) y = pd.Series(dumy_df.Default_Binary) encoder = WeightOfEvidenceEncoder(cols=['sub_grade', 'zip_code', 'addr_state']) df_woe_1 = encoder.fit_transform(df_woe, y) # Creating encoded data dataset frames = [encoded_df_2, df_woe_1] encoded_data = pd.concat(objs=frames, axis=1, join='outer', copy=False, sort=False) # Extracting used columns names frames = [df_encoder, df_woe] encoded_col = pd.concat(objs=frames, axis=1,