def test_cross_val_stack(self): x, y = DataGenerator.get_digits_data() # In order to obtain some categorical columns x['i63'] = x['i63'].map(str) x['i62'] = x['i62'].map(str) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) dic = {} x_shadow = x_train.copy() x_train.loc[:, 'source'] = 0 x_shadow.loc[:, 'source'] = 1 x_all = pd.concat([x_train, x_shadow]) shadow_selector = x_all['source'] == 0 ChaosGeneration.chaos_feature_importance(x_all, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30) sorted_x = sorted(dic.items(), key=operator.itemgetter(1)) self.assertGreater(len(dic), len(x_train.columns)) self.assertGreater(len(dic), len(x_shadow.columns))
def chaosize(x_feat, x_train1, x_valid, y_feat, **chaos_args): x_feat.loc[:, 'source'] = 0 x_train1.loc[:, 'source'] = 1 x_valid.loc[:, 'source'] = 2 x_all = pd.concat([x_feat, x_train1, x_valid]) Cg.chaos_feature_importance(x_all, y_feat, x_all['source'] == 0, **chaos_args) x_train_res = x_all[x_all['source'] == 1].drop(['source'], axis=1) x_valid_res = x_all[x_all['source'] == 2].drop(['source'], axis=1) return x_train_res, x_valid_res
def chaosize(x_feat, x_mirror, x_valid, y_feat, **chaos_args): x_feat.loc[:, 'source'] = 0 x_mirror.loc[:, 'source'] = 1 x_valid.loc[:, 'source'] = 2 x_all = pd.concat([x_feat, x_mirror, x_valid]) Cg.chaos_feature_importance(x_all, y_feat, x_all['source'] == 0, **chaos_args) x_train_res = x_all[x_all['source'] == 1].drop(['source'], axis=1) x_valid_res = x_all[x_all['source'] == 2].drop(['source'], axis=1) return x_train_res, x_valid_res
def test_cross_val_stack_none(self): x, y = DataGenerator.get_digits_data() # In order to obtain some categorical columns x["i63"] = x["i63"].map(str) x["i62"] = x["i62"].map(str) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) dic = {} x_train.loc[:, "source"] = 0 shadow_selector = x_train["source"] == 0 ChaosGeneration.chaos_feature_importance( x_train, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30 ) sorted_x = sorted(dic.items(), key=operator.itemgetter(1)) self.assertGreater(len(dic), len(x_train.columns))
def test_handle_nocategoric_nonreg(self): x, y = DataGenerator.get_digits_data() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) dic = {} x_train.loc[:, 'source'] = 0 shadow_selector = x_train['source'] == 0 ChaosGeneration.chaos_feature_importance(x_train, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30) self.assertGreater(len(dic), len(x_train.columns))