def drop_weak_columns(ctx, feature_selector=None): import sklearn.feature_selection as fs if feature_selector is None and ctx.is_regression: feature_selector = fs.SelectFwe(fs.f_regression) if feature_selector is None and ctx.is_classification: feature_selector = fs.SelectFwe(fs.f_classif) X, y = ctx.training_data() X = X._float_array() feature_selector.fit(X, y) weak_cols = np.where(np.invert(feature_selector.get_support()))[0] # For now, ignore the selector if it wants to drop every column. if 0 < len(weak_cols) < len(ctx.matrix.columns): _drop_weak_columns(ctx, weak_cols.tolist())
def get_fs_model(model, method, train, target=None, cv=None): """Connects given model with specified feature selection method and trains the final structure. """ if method == "RFE": model = fs_scikit.RFE(model, 2, step=5) if target is not None: return model.fit(train, target) else: return model.fit(train) if method == "RFECV": model = fs_scikit.RFECV(model, 3, cv=cv) if target is not None: return model.fit(train, target) else: return model.fit(train) elif method == "linearSVC": sel = SelectFromModel(LinearSVC(penalty='l1', dual=False)) model = Pipeline([('feature_selection', sel), ('data_mining', model)]) elif method == "fromModel": fm = fs_scikit.SelectFromModel(model) if target is not None: fm.fit(train, target) else: fm.fit(train) model = Pipeline([('feature_selection', fm), ('data_mining', model)]) # elif method == "Anova": # ANOVA SVM-C # anova_filter = fs_scikit.SelectKBest(f_regression, k=5) # model = Pipeline([ # ('feature_selection', anova_filter), # ('data_mining', model) # ]) elif method == "VarianceThreshold": sel = fs_scikit.VarianceThreshold(threshold=(.8 * (1 - .8))) model = Pipeline([('feature_selection', sel), ('data_mining', model)]) elif method == "SelectPercentile": sel = fs_scikit.SelectPercentile(fs_scikit.f_classif, percentile=30) model = Pipeline([('feature_selection', sel), ('data_mining', model)]) elif method == "SelectFpr": sel = fs_scikit.SelectFpr(alpha=0.2) model = Pipeline([('feature_selection', sel), ('data_mining', model)]) elif method == "SelectFdr": sel = fs_scikit.SelectFdr(alpha=0.2) model = Pipeline([('feature_selection', sel), ('data_mining', model)]) elif method == "SelectFwe": sel = fs_scikit.SelectFwe(alpha=0.2) model = Pipeline([('feature_selection', sel), ('data_mining', model)]) elif method == "ch2": sel = fs_scikit.SelectKBest(fs_scikit.chi2, k=2) model = Pipeline([('feature_selection', sel), ('data_mining', model)]) else: print("Feature selection method was not found: " + method) sys.exit(1) return model
def selection_process(b_data, labels): # scatter_variables(all_data, 'MONTH') # n_data = normalize(b_data) # The idea is to compare possible outcomes combination. # this works: # X_train, X_test, y_train, y_test = ms.train_test_split(b_data, labels, test_size=0.33, random_state=RANDOM_SEED, stratify=labels) # clf = Pipeline([('RandomForestClassifier', RandomForestClassifier(n_estimators=100, n_jobs = -1, random_state=RANDOM_SEED))]) # results = benchmark(clf, 'RandomForestClassifier', X_train, y_train, X_test, y_test) # for i in all_data.columns.values: # print(all_data[i].min(), '-', all_data[i].max()) # clf = svm_classify(X_train, y_train) # y_pred = clf.predict(X_test) # print(classification_report(y_test, y_pred)) # this doesnt work: # attrib = attributes_selection(b_data, labels, k=50, invariant=True, function=f_classif) # mutual_info_classif # red_data = b_data[attrib[0]] # n_data = normalize(red_data) # X_train, X_test, y_train, y_test = ms.train_test_split(n_data, labels, test_size=0.33, random_state=RANDOM_SEED) #results = benchmark(clf, X_train, y_train, X_test, y_test) # sel_clf = svm_classify(X_train, y_train) # sel_y_pred = sel_clf.predict(X_test) # print(classification_report(y_test, sel_y_pred)) for name, slt in ( ('LinearSVCselection', SelectFromModel(LinearSVC(penalty="l1"))), ('SelectKBest', SelectKBest(mutual_info_classif, k=50)), ('SelectFwe', fs.SelectFwe(mutual_info_classif, alpha=0.05)), ): print('&' * 80) print(name) pip = Pipeline([slt]) results = [] for name, clf in (("RidgeClassifier", RidgeClassifier(tol=1e-2, solver="lsqr")), ("Perceptron", Perceptron(n_iter=50)), ("PassiveAggressive", PassiveAggressiveClassifier(n_iter=50)), ("kNN", KNeighborsClassifier(n_neighbors=10)), ("RandomForest", RandomForestClassifier(n_estimators=100))): print('=' * 80) print(name) pip.classes_.append(clf) results.append( benchmark(pip, name, X_train, y_train, X_test, y_test))
def select_best(): df = pd.merge( acw.gen_long_data(tpt) .normalize(columns="metric") .add_net_meta(tpt.net_hierarchy(HierarchyName.RESTRICTED_PERIPHERY_CORE)) .groupby(["task", "subject", "region", "net_meta"]).mean().reset_index() .rename(columns={"metric": "acw"}), acz.gen_long_data(tpt) .normalize(columns="metric") .add_net_meta(tpt.net_hierarchy(HierarchyName.RESTRICTED_PERIPHERY_CORE)) .groupby(["task", "subject", "region", "net_meta"]).mean().reset_index() .rename(columns={"metric": "acz"}), on=["task", "subject", "region", "net_meta"], sort=False).and_filter(NOTnet_meta="M") X = df.iloc[:, -2:].values y = df.net_meta.map({"C": 0, "P": 1}).values functions = [fs.mutual_info_classif, fs.f_classif, fs.chi2] for func in functions: for method in [fs.SelectKBest(func, k=1), fs.SelectPercentile(func), fs.SelectFdr(func), fs.SelectFpr(func), fs.SelectFwe(func)]: method.fit(X, y) print(f'{str(method).split("(")[0]} {func.__name__}: {np.argmax(method.scores_) + 1}')
def get_feature(n): x_train, y_train = train_data(n) # x_test, y_test = te4t_data() x_train=np.array(x_train) y_train=np.array(y_train) # x_test=np.array(x_test) # y_test=np.array(y_test) kf=StratifiedKFold(n_splits=5,shuffle=True) p=0 # y_train=np.ravel(y_train) # res=sm.Logit(y_train,x_train).fit(method='bfgs') # print(res.summary()) a=[] for train_index, test_index in kf.split(x_train,y_train): X_train1, X_test1 = x_train[train_index], x_train[test_index] y_train1, y_test1 = y_train[train_index], y_train[test_index] select_feature=fs.SelectFwe() select_feature.fit(X_train1, y_train1) print(select_feature.get_support(True)) # get new X arrary X_train1 = select_feature.transform(X_train1) X_test1 = select_feature.transform(X_test1) # y_train=np.ravel(y_train) clf = LogisticRegression() clf.fit(X_train1, y_train1) y_pred = clf.predict(X_test1) a.append(np.mean(y_pred == y_test1)) p = p + np.mean(y_pred == y_test1) print(a) print("precision:") print(p/5.000000000000) return select_feature
raise _feature_selectors = [] _feature_selectors.append((feature_selection.SelectKBest(k=1), pd_feature_selection.SelectKBest(k=1), True)) _feature_selectors.append( (feature_selection.SelectKBest(k=1), pickle.loads(pickle.dumps(pd_feature_selection.SelectKBest(k=1))), True)) _feature_selectors.append((feature_selection.SelectKBest(k=2), pd_feature_selection.SelectKBest(k=2), True)) _feature_selectors.append((feature_selection.SelectPercentile(), pd_feature_selection.SelectPercentile(), True)) _feature_selectors.append( (feature_selection.SelectFdr(), pd_feature_selection.SelectFdr(), True)) _feature_selectors.append( (feature_selection.SelectFwe(), pd_feature_selection.SelectFwe(), True)) # Tmp Ami if False: _feature_selectors.append( (feature_selection.RFE(linear_model.LogisticRegression()), pd_feature_selection.RFE(pd_linear_model.LogisticRegression()), True)) _keras_estimators = [] if _level > 0: _keras_estimators.append( (KerasClassifier(_build_classifier_nn, verbose=0), PdKerasClassifier(_build_classifier_nn, _load_iris()[0]['class'].unique(), verbose=0), False)) _keras_estimators.append((KerasRegressor(_build_regressor_nn, verbose=0), PdKerasRegressor(_build_regressor_nn,
# # MAIN # synonyms_filepath = io_utils.get_synonyms_filepath() UNIVARIATE = { "uv_kbest_def": feature_selection.SelectKBest(f_classif, k=10), "uv_kbest_chi2_def": feature_selection.SelectKBest(chi2, k=10), "uv_percentile_def": feature_selection.SelectPercentile(f_classif, percentile=10), "uv_fpr_def": feature_selection.SelectFpr(f_classif), "uv_fwe_def": feature_selection.SelectFwe(f_classif) } print "Preparing Train Collection" X_train, y_train = create_train_data(io_utils.get_train_vectors_list()) print "Preparing Test Collection" X_test, test_collections = create_test_data(io_utils.get_train_vectors_list()) # Univariate for univariate_model_name in UNIVARIATE: model = UNIVARIATE[univariate_model_name] model.fit(X_train, y_train) X_train_new = model.transform(X_train) X_test_new = model.transform(X_test) for method_name in CLASSIFIERS: name = "{}_{}".format(method_name, univariate_model_name)
def _eval_search_params(params_builder): search_params = {} for p in params_builder['param_set']: search_list = p['sp_list'].strip() if search_list == '': continue param_name = p['sp_name'] if param_name.lower().endswith(NON_SEARCHABLE): print("Warning: `%s` is not eligible for search and was " "omitted!" % param_name) continue if not search_list.startswith(':'): safe_eval = SafeEval(load_scipy=True, load_numpy=True) ev = safe_eval(search_list) search_params[param_name] = ev else: # Have `:` before search list, asks for estimator evaluatio safe_eval_es = SafeEval(load_estimators=True) search_list = search_list[1:].strip() # TODO maybe add regular express check ev = safe_eval_es(search_list) preprocessings = ( preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), preprocessing.PolynomialFeatures(), preprocessing.RobustScaler(), feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.TruncatedSVD(random_state=0), kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), cluster.FeatureAgglomeration(), skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.CondensedNearestNeighbour( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RepeatedEditedNearestNeighbours( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.InstanceHardnessThreshold( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NeighbourhoodCleaningRule( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RandomUnderSampler(random_state=0), imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.RandomOverSampler(random_state=0), imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0)) newlist = [] for obj in ev: if obj is None: newlist.append(None) elif obj == 'all_0': newlist.extend(preprocessings[0:35]) elif obj == 'sk_prep_all': # no KernalCenter() newlist.extend(preprocessings[0:7]) elif obj == 'fs_all': newlist.extend(preprocessings[7:14]) elif obj == 'decomp_all': newlist.extend(preprocessings[14:25]) elif obj == 'k_appr_all': newlist.extend(preprocessings[25:29]) elif obj == 'reb_all': newlist.extend(preprocessings[30:35]) elif obj == 'imb_all': newlist.extend(preprocessings[35:54]) elif type(obj) is int and -1 < obj < len(preprocessings): newlist.append(preprocessings[obj]) elif hasattr(obj, 'get_params'): # user uploaded object if 'n_jobs' in obj.get_params(): newlist.append(obj.set_params(n_jobs=N_JOBS)) else: newlist.append(obj) else: sys.exit("Unsupported estimator type: %r" % (obj)) search_params[param_name] = newlist return search_params
def get_search_params(params_builder): search_params = {} safe_eval = SafeEval(load_scipy=True, load_numpy=True) safe_eval_es = SafeEval(load_estimators=True) for p in params_builder['param_set']: search_p = p['search_param_selector']['search_p'] if search_p.strip() == '': continue param_type = p['search_param_selector']['selected_param_type'] lst = search_p.split(':') assert ( len(lst) == 2 ), "Error, make sure there is one and only one colon in search parameter input." literal = lst[1].strip() param_name = lst[0].strip() if param_name: if param_name.lower() == 'n_jobs': sys.exit("Parameter `%s` is invalid for search." % param_name) elif not param_name.endswith('-'): ev = safe_eval(literal) if param_type == 'final_estimator_p': search_params['estimator__' + param_name] = ev else: search_params['preprocessing_' + param_type[5:6] + '__' + param_name] = ev else: # only for estimator eval, add `-` to the end of param #TODO maybe add regular express check ev = safe_eval_es(literal) for obj in ev: if 'n_jobs' in obj.get_params(): obj.set_params(n_jobs=N_JOBS) if param_type == 'final_estimator_p': search_params['estimator__' + param_name[:-1]] = ev else: search_params['preprocessing_' + param_type[5:6] + '__' + param_name[:-1]] = ev elif param_type != 'final_estimator_p': #TODO regular express check ? ev = safe_eval_es(literal) preprocessors = [ preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.Imputer(), preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), preprocessing.PolynomialFeatures(), preprocessing.RobustScaler(), feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.TruncatedSVD(random_state=0), kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), cluster.FeatureAgglomeration(), skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.CondensedNearestNeighbour( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RepeatedEditedNearestNeighbours( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.InstanceHardnessThreshold( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NeighbourhoodCleaningRule( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RandomUnderSampler(random_state=0), imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.RandomOverSampler(random_state=0), imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0) ] newlist = [] for obj in ev: if obj is None: newlist.append(None) elif obj == 'all_0': newlist.extend(preprocessors[0:36]) elif obj == 'sk_prep_all': # no KernalCenter() newlist.extend(preprocessors[0:8]) elif obj == 'fs_all': newlist.extend(preprocessors[8:15]) elif obj == 'decomp_all': newlist.extend(preprocessors[15:26]) elif obj == 'k_appr_all': newlist.extend(preprocessors[26:30]) elif obj == 'reb_all': newlist.extend(preprocessors[31:36]) elif obj == 'imb_all': newlist.extend(preprocessors[36:55]) elif type(obj) is int and -1 < obj < len(preprocessors): newlist.append(preprocessors[obj]) elif hasattr(obj, 'get_params'): # user object if 'n_jobs' in obj.get_params(): newlist.append(obj.set_params(n_jobs=N_JOBS)) else: newlist.append(obj) else: sys.exit("Unsupported preprocessor type: %r" % (obj)) search_params['preprocessing_' + param_type[5:6]] = newlist else: sys.exit("Parameter name of the final estimator can't be skipped!") return search_params
def fit(actions, dataset): X_train, X_test, y_train, y_test = dataset seq = {} #fit_transformer if actions[1].item() == 0: log1p_fit_transformer = preprocessing.FunctionTransformer() seq[1] = log1p_fit_transformer else: quantile_fit_transformer = preprocessing.QuantileTransformer( random_state=0) seq[1] = quantile_fit_transformer #scaler if actions[3].item() == 0: standard_scaler = preprocessing.StandardScaler() seq[2] = standard_scaler elif actions[3].item() == 1: robust_scaler = preprocessing.RobustScaler() seq[2] = robust_scaler else: min_max_scaler = preprocessing.MinMaxScaler() seq[2] = min_max_scaler #constructers if actions[5].item() == 0: seq[3] = preprocessing.PolynomialFeatures(interaction_only=True) else: seq[3] = FeatureAgglomeration(5) #selecter if actions[7].item() == 0: selecter = feature_selection.SelectFwe() seq[4] = selecter elif actions[7].item() == 1: selecter = feature_selection.SelectPercentile() seq[4] = selecter elif actions[7].item() == 2: selecter = feature_selection.RFE( sklearn.ensemble.ExtraTreesClassifier()) seq[4] = selecter else: selecter = feature_selection.SelectFromModel( sklearn.ensemble.ExtraTreesClassifier(), "median") seq[4] = selecter #models if actions[-1].item() == 0: model = sklearn.naive_bayes.GaussianNB() seq[5] = model elif actions[-1].item() == 1: model = sklearn.ensemble.RandomForestClassifier() seq[5] = model elif actions[-1].item() == 2: model = sklearn.naive_bayes.BernoulliNB() seq[5] = model elif actions[-1].item() == 3: model = linear_model.LogisticRegression() seq[5] = model elif actions[-1].item() == 4: model = sklearn.tree.DecisionTreeClassifier() seq[5] = model else: model = sklearn.ensemble.ExtraTreesClassifier() seq[5] = model #connectivity transformed = {} #For Node 1 transformed[1] = seq[1].fit_transform(X_train) #For Node 2 if actions[2].item() == 0: transformed[2] = seq[2].fit_transform(X_train) elif actions[2].item() == 1: transformed[2] = seq[2].fit_transform(transformed[1]) #For Node 3 if actions[4].item() == 0: transformed[3] = seq[3].fit_transform(X_train) elif actions[4].item() == 1: transformed[3] = seq[3].fit_transform(transformed[1]) elif actions[4].item() == 2: transformed[3] = seq[3].fit_transform(transformed[2]) #For Node 4 if actions[6].item() == 0: transformed[4] = seq[4].fit_transform(X_train, y_train) elif actions[6].item() == 1: transformed[4] = seq[4].fit_transform(transformed[1], y_train) elif actions[6].item() == 2: transformed[4] = seq[4].fit_transform(transformed[2], y_train) elif actions[6].item() == 3: transformed[4] = seq[4].fit_transform(transformed[3], y_train) #leaf nodes leaf_nodes = set(range(5)) - {i.item() for i in actions[0:-1:2]} # print(leaf_nodes) merge_data = np.concatenate([transformed[i] for i in leaf_nodes], axis=1) last_selecter = feature_selection.SelectFromModel( sklearn.ensemble.ExtraTreesClassifier(), "median") merge_data = last_selecter.fit_transform(merge_data, y_train) clf = seq[5].fit(merge_data, y_train) #test data test_transformed = {} #For Node 1 test_transformed[1] = seq[1].transform(X_test) #For Node 2 if actions[2].item() == 0: test_transformed[2] = seq[2].transform(X_test) elif actions[2].item() == 1: test_transformed[2] = seq[2].transform(test_transformed[1]) #For Node 3 if actions[4].item() == 0: test_transformed[3] = seq[3].transform(X_test) elif actions[4].item() == 1: test_transformed[3] = seq[3].transform(test_transformed[1]) elif actions[4].item() == 2: test_transformed[3] = seq[3].transform(test_transformed[2]) #For Node 4 if actions[6].item() == 0: test_transformed[4] = seq[4].transform(X_test) elif actions[6].item() == 1: test_transformed[4] = seq[4].transform(test_transformed[1]) elif actions[6].item() == 2: test_transformed[4] = seq[4].transform(test_transformed[2]) elif actions[6].item() == 3: test_transformed[4] = seq[4].transform(test_transformed[3]) # print([test_transformed[i].shape for i in leaf_nodes]) merge_data = np.concatenate([test_transformed[i] for i in leaf_nodes], axis=1) merge_data = last_selecter.transform(merge_data) pred_test = clf.predict(merge_data) # reward = metrics.accuracy_score(y_test, pred_test) reward = balanced_accuracy_score(y_test, pred_test) # print(clf) # print(reward) # print('\nPrediction accuracy for the normal test dataset with log tranformer') # print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred_test))) return reward