Esempio n. 1
0
def get_fs_model(model, method, train, target=None, cv=None):
    """Connects given model with specified feature selection method and trains
    the final structure.
    """
    if method == "RFE":
        model = fs_scikit.RFE(model, 2, step=5)
        if target is not None:
            return model.fit(train, target)
        else:
            return model.fit(train)
    if method == "RFECV":
        model = fs_scikit.RFECV(model, 3, cv=cv)
        if target is not None:
            return model.fit(train, target)
        else:
            return model.fit(train)
    elif method == "linearSVC":
        sel = SelectFromModel(LinearSVC(penalty='l1', dual=False))
        model = Pipeline([('feature_selection', sel), ('data_mining', model)])
    elif method == "fromModel":
        fm = fs_scikit.SelectFromModel(model)
        if target is not None:
            fm.fit(train, target)
        else:
            fm.fit(train)
        model = Pipeline([('feature_selection', fm), ('data_mining', model)])

    # elif method == "Anova":
    # ANOVA SVM-C
    # anova_filter = fs_scikit.SelectKBest(f_regression, k=5)
    # model = Pipeline([
    #     ('feature_selection', anova_filter),
    #     ('data_mining', model)
    # ])
    elif method == "VarianceThreshold":
        sel = fs_scikit.VarianceThreshold(threshold=(.8 * (1 - .8)))
        model = Pipeline([('feature_selection', sel), ('data_mining', model)])
    elif method == "SelectPercentile":
        sel = fs_scikit.SelectPercentile(fs_scikit.f_classif, percentile=30)
        model = Pipeline([('feature_selection', sel), ('data_mining', model)])
    elif method == "SelectFpr":
        sel = fs_scikit.SelectFpr(alpha=0.2)
        model = Pipeline([('feature_selection', sel), ('data_mining', model)])
    elif method == "SelectFdr":
        sel = fs_scikit.SelectFdr(alpha=0.2)
        model = Pipeline([('feature_selection', sel), ('data_mining', model)])
    elif method == "SelectFwe":
        sel = fs_scikit.SelectFwe(alpha=0.2)
        model = Pipeline([('feature_selection', sel), ('data_mining', model)])
    elif method == "ch2":
        sel = fs_scikit.SelectKBest(fs_scikit.chi2, k=2)
        model = Pipeline([('feature_selection', sel), ('data_mining', model)])
    else:
        print("Feature selection method was not found: " + method)
        sys.exit(1)
    return model
Esempio n. 2
0
def univCV(data, labels, cut_level):
    '''calculate cross-validated univariate cut'''
    #loop for the list
    correlations = np.zeros(len(cut_level))
    for inx, i in enumerate(cut_level):
        cross = sklcv.KFold(n=len(labels), n_folds=len(labels))
        prediction = np.zeros_like(labels)
        for train, test in cross:
            univ = sklfs.SelectFpr(sklfs.f_regression, alpha=i)
            prep_data = data[train]
            prep_test = data[test]

            #if use_modules.find('a') != -1:
            #    univ_agglo = sklcl.WardAgglomeration(connectivity=connect, n_clusters=ward_level)
            #    prep_data = univ_agglo.fit_transform(prep_data)
            #    prep_test = univ_agglo.transform(prep_test)

            #if use_modules.find('b') != -1:
            #    bool_pos, bool_neg = direction_cutoff(prep_data)
            #    prep_data = prep_data[:, bool_pos]
            #    prep_test = prep_test[:, bool_pos]

            #if use_modules.find('c') != -1:
            #    scaler = sklpre.StandardScaler()
            #    prep_data = scaler.fit_transform(prep_data)
            #    prep_test = scaler.transform(prep_test)

            prep_data = univ.fit_transform(prep_data, labels[train])
            mod = sklsvm.NuSVR(kernel='linear', nu=1, C=100)  #Change model
            mod.fit(prep_data, labels[train])
            prep_test = univ.transform(prep_test)
            pred = mod.predict(prep_test)
            prediction[test] = pred
        #calculate prediction
        correlations[inx], _ = ss.spearmanr(prediction, labels)
    #TODO - smooth this?
    correlations = ssig.medfilt(correlations)
    best_cut = cut_level[correlations.argmax()]

    return best_cut
Esempio n. 3
0
def select_features(x, y):
    """

    :param x: dataframe of features
    :param y: dataframe of target property
    :return: Outputs of feature selection process
    """
    x = pd.DataFrame(x)

    # Removing features with low variance
    var_threshold = f_selection.VarianceThreshold(threshold=(.8 * (1 - .8)))

    # Kbest-based and Percentile-based feature selection using regression
    f_regress = f_selection.f_regression(x, y, center=False)
    kbest = f_selection.SelectKBest(score_func=f_regress, k=2)
    percent = f_selection.SelectPercentile(score_func=f_regress, percentile=10)

    # Tree-based feature selection using a number of randomized decision trees
    trees = f_selection.SelectFromModel(ExtraTreesRegressor, prefit=True)

    # "False positive rate"-based feature selection using regression
    fpr = f_selection.SelectFpr(score_func=f_regress, alpha=0.05)

    # PCA-component evaluation
    pca = PCA(n_components=2)

    # Recursive feature elimination and cross-validated feature selection
    estimator = SVR(kernel="linear")
    selector = f_selection.RFECV(estimator, step=1, cv=5)

    # Build estimator from PCA and Univariate selection:
    combined_features = FeatureUnion([("pca_based", pca),
                                      ("univ_kbest", kbest),
                                      ("false_positive_rate", fpr),
                                      ("percentile_based", percent),
                                      ("RFECV_selector", selector),
                                      ("variance_threshold", var_threshold),
                                      ("trees_based", trees)])
    x_union_features = combined_features.fit_transform(x, y)

    svm = SVC(kernel="linear")

    # Do grid search over all parameters:
    pipeline = Pipeline([("features", x_union_features), ("svm", svm)])

    grid = dict(features__pca_based__n_components=range(1, 101),
                features__univ_kbest__k=range(1, 101),
                features_false_positive_rate_alpha=range(0, 1, 0.01),
                features_percentile_based_percentile=range(1, 20, 1),
                features_RFECV_selector_cv=range(1, 5),
                features_variance_threshold_threshold=range(0, 1, 0.01),
                svm__C=[0.01, 0.1, 1.0, 10.0])

    grid_search = GridSearchCV(pipeline, param_grid=grid, verbose=0)
    x_features = grid_search.fit_transform(x, y)

    # Pickling feature reduction outputs
    with open(FS_PICKLE, 'wb') as result:
        pickle.dump(rf_sorted_score, result, pickle.HIGHEST_PROTOCOL)
        pickle.dump(grid_search.best_estimator_, result,
                    pickle.HIGHEST_PROTOCOL)

    print grid_search.best_estimator_

    return x_features
Esempio n. 4
0
def select_best():
    df = pd.merge(
        acw.gen_long_data(tpt)
            .normalize(columns="metric")
            .add_net_meta(tpt.net_hierarchy(HierarchyName.RESTRICTED_PERIPHERY_CORE))
            .groupby(["task", "subject", "region", "net_meta"]).mean().reset_index()
            .rename(columns={"metric": "acw"}),
        acz.gen_long_data(tpt)
            .normalize(columns="metric")
            .add_net_meta(tpt.net_hierarchy(HierarchyName.RESTRICTED_PERIPHERY_CORE))
            .groupby(["task", "subject", "region", "net_meta"]).mean().reset_index()
            .rename(columns={"metric": "acz"}),
        on=["task", "subject", "region", "net_meta"], sort=False).and_filter(NOTnet_meta="M")

    X = df.iloc[:, -2:].values
    y = df.net_meta.map({"C": 0, "P": 1}).values

    functions = [fs.mutual_info_classif, fs.f_classif, fs.chi2]
    for func in functions:
        for method in [fs.SelectKBest(func, k=1), fs.SelectPercentile(func), fs.SelectFdr(func), fs.SelectFpr(func),
                       fs.SelectFwe(func)]:
            method.fit(X, y)
            print(f'{str(method).split("(")[0]} {func.__name__}: {np.argmax(method.scores_) + 1}')
Esempio n. 5
0
from classifiers.estimators_all import CLASSIFIERS

#
# MAIN
#
synonyms_filepath = io_utils.get_synonyms_filepath()

UNIVARIATE = {
    "uv_kbest_def":
    feature_selection.SelectKBest(f_classif, k=10),
    "uv_kbest_chi2_def":
    feature_selection.SelectKBest(chi2, k=10),
    "uv_percentile_def":
    feature_selection.SelectPercentile(f_classif, percentile=10),
    "uv_fpr_def":
    feature_selection.SelectFpr(f_classif),
    "uv_fwe_def":
    feature_selection.SelectFwe(f_classif)
}

print "Preparing Train Collection"
X_train, y_train = create_train_data(io_utils.get_train_vectors_list())
print "Preparing Test Collection"
X_test, test_collections = create_test_data(io_utils.get_train_vectors_list())

# Univariate
for univariate_model_name in UNIVARIATE:
    model = UNIVARIATE[univariate_model_name]
    model.fit(X_train, y_train)
    X_train_new = model.transform(X_train)
    X_test_new = model.transform(X_test)
Esempio n. 6
0
def _eval_search_params(params_builder):
    search_params = {}

    for p in params_builder['param_set']:
        search_list = p['sp_list'].strip()
        if search_list == '':
            continue

        param_name = p['sp_name']
        if param_name.lower().endswith(NON_SEARCHABLE):
            print("Warning: `%s` is not eligible for search and was "
                  "omitted!" % param_name)
            continue

        if not search_list.startswith(':'):
            safe_eval = SafeEval(load_scipy=True, load_numpy=True)
            ev = safe_eval(search_list)
            search_params[param_name] = ev
        else:
            # Have `:` before search list, asks for estimator evaluatio
            safe_eval_es = SafeEval(load_estimators=True)
            search_list = search_list[1:].strip()
            # TODO maybe add regular express check
            ev = safe_eval_es(search_list)
            preprocessings = (
                preprocessing.StandardScaler(), preprocessing.Binarizer(),
                preprocessing.MaxAbsScaler(), preprocessing.Normalizer(),
                preprocessing.MinMaxScaler(),
                preprocessing.PolynomialFeatures(),
                preprocessing.RobustScaler(), feature_selection.SelectKBest(),
                feature_selection.GenericUnivariateSelect(),
                feature_selection.SelectPercentile(),
                feature_selection.SelectFpr(), feature_selection.SelectFdr(),
                feature_selection.SelectFwe(),
                feature_selection.VarianceThreshold(),
                decomposition.FactorAnalysis(random_state=0),
                decomposition.FastICA(random_state=0),
                decomposition.IncrementalPCA(),
                decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
                decomposition.LatentDirichletAllocation(random_state=0,
                                                        n_jobs=N_JOBS),
                decomposition.MiniBatchDictionaryLearning(random_state=0,
                                                          n_jobs=N_JOBS),
                decomposition.MiniBatchSparsePCA(random_state=0,
                                                 n_jobs=N_JOBS),
                decomposition.NMF(random_state=0),
                decomposition.PCA(random_state=0),
                decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
                decomposition.TruncatedSVD(random_state=0),
                kernel_approximation.Nystroem(random_state=0),
                kernel_approximation.RBFSampler(random_state=0),
                kernel_approximation.AdditiveChi2Sampler(),
                kernel_approximation.SkewedChi2Sampler(random_state=0),
                cluster.FeatureAgglomeration(),
                skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS),
                skrebate.SURFstar(n_jobs=N_JOBS),
                skrebate.MultiSURF(n_jobs=N_JOBS),
                skrebate.MultiSURFstar(n_jobs=N_JOBS),
                imblearn.under_sampling.ClusterCentroids(random_state=0,
                                                         n_jobs=N_JOBS),
                imblearn.under_sampling.CondensedNearestNeighbour(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.EditedNearestNeighbours(random_state=0,
                                                                n_jobs=N_JOBS),
                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.InstanceHardnessThreshold(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.NearMiss(random_state=0,
                                                 n_jobs=N_JOBS),
                imblearn.under_sampling.NeighbourhoodCleaningRule(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.OneSidedSelection(random_state=0,
                                                          n_jobs=N_JOBS),
                imblearn.under_sampling.RandomUnderSampler(random_state=0),
                imblearn.under_sampling.TomekLinks(random_state=0,
                                                   n_jobs=N_JOBS),
                imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.RandomOverSampler(random_state=0),
                imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.BorderlineSMOTE(random_state=0,
                                                       n_jobs=N_JOBS),
                imblearn.over_sampling.SMOTENC(categorical_features=[],
                                               random_state=0,
                                               n_jobs=N_JOBS),
                imblearn.combine.SMOTEENN(random_state=0),
                imblearn.combine.SMOTETomek(random_state=0))
            newlist = []
            for obj in ev:
                if obj is None:
                    newlist.append(None)
                elif obj == 'all_0':
                    newlist.extend(preprocessings[0:35])
                elif obj == 'sk_prep_all':  # no KernalCenter()
                    newlist.extend(preprocessings[0:7])
                elif obj == 'fs_all':
                    newlist.extend(preprocessings[7:14])
                elif obj == 'decomp_all':
                    newlist.extend(preprocessings[14:25])
                elif obj == 'k_appr_all':
                    newlist.extend(preprocessings[25:29])
                elif obj == 'reb_all':
                    newlist.extend(preprocessings[30:35])
                elif obj == 'imb_all':
                    newlist.extend(preprocessings[35:54])
                elif type(obj) is int and -1 < obj < len(preprocessings):
                    newlist.append(preprocessings[obj])
                elif hasattr(obj, 'get_params'):  # user uploaded object
                    if 'n_jobs' in obj.get_params():
                        newlist.append(obj.set_params(n_jobs=N_JOBS))
                    else:
                        newlist.append(obj)
                else:
                    sys.exit("Unsupported estimator type: %r" % (obj))

            search_params[param_name] = newlist

    return search_params
Esempio n. 7
0
def run_pipe(input_files, input_labels, use_modules, no_proc):
    '''run svr forkflow on data'''

    #--------------Organise inputs
    #calculate matrix
    #feature_matrix = prepare_modality(input_files, input_mask)
    #--------------Execute analysis
    #prepare feature agglomeration
    #mask_handle = nb.load(input_mask)
    connect = sklim.grid_to_graph(*input_files[0].shape,
                                  mask=np.invert(
                                      np.isnan(np.sum(input_files, 0))))
    inshape = input_files.shape

    feature_matrix = input_files.reshape((inshape[0], -1))

    #remove nans
    sum_features = np.sum(feature_matrix, 0)
    feature_matrix = feature_matrix[:, np.invert(np.isnan(sum_features))]

    #cross validation
    loo = sklcv.KFold(len(input_labels), n_folds=len(input_labels))
    print('Starting svr')

    cv_pred = jl.Parallel(n_jobs=no_proc, verbose=1, pre_dispatch=no_proc * 2)(
        jl.delayed(do_model)(feature_matrix[train], input_labels[train],
                             feature_matrix[test], connect, use_modules)
        for train, test in loo)
    cv_pred = np.array(cv_pred)
    corr, p = ss.pearsonr(cv_pred[:, 0], input_labels)

    #creating final model
    print('creating final model')
    if use_modules.find('a') != -1:
        final_agglo = sklcl.WardAgglomeration(connectivity=connect,
                                              n_clusters=int(
                                                  np.median(cv_pred[:, 1])))
        feature_matrix = final_agglo.fit_transform(feature_matrix)
    else:
        final_agglo = 0

    if use_modules.find('b') != -1:
        bool_pos, bool_neg = direction_cutoff(feature_matrix)
        feature_matrix = feature_matrix[:, bool_pos]
    else:
        bool_pos = 0

    if use_modules.find('c') != -1:
        final_scaler = sklpre.StandardScaler()
        feature_matrix = final_scaler.fit_transform(feature_matrix)
    else:
        final_scaler = 0

    if use_modules.find('d') != -1:
        final_univ = sklfs.SelectFpr(alpha=np.median(cv_pred[:, 2]))
        feature_matrix = final_univ.fit_transform(feature_matrix, input_labels)
    else:
        final_univ = 0

    final_model = sklsvm.NuSVR(kernel='linear',
                               C=100,
                               degree=1,
                               nu=np.median(cv_pred[:, 3]))
    final_model.fit(feature_matrix, input_labels)

    return cv_pred, corr, p, final_agglo, final_univ, final_scaler, bool_pos, final_model
Esempio n. 8
0
def do_model(train_d, train_l, test_d, connect, use_modules):

    #ward clustering (a)
    if use_modules.find('a') != -1:
        no_feat = len(train_d[0, :])
        ward_sizes = np.array([
            int(no_feat),
            int(no_feat * 0.8),
            int(no_feat * 0.5),
            int(no_feat * 0.1),
            int(no_feat * 0.01)
        ])  # set to about 100, 50 and 10% add 1/10000 for dbm
        use_wardsize = wardCV(train_d, train_l, ward_sizes, connect)
        agglo = sklcl.WardAgglomeration(connectivity=connect,
                                        n_clusters=use_wardsize)

        train_d = agglo.fit_transform(train_d)
        test_d = agglo.transform(test_d)
    else:
        use_wardsize = '0'

    #include positive values only(b)
    if use_modules.find('b') != -1:
        bool_pos, bool_neg = direction_cutoff(train_d)

        train_d = train_d[:, bool_pos]
        test_d = test_d[:, bool_pos]

    #scale features to z scores(c)
    if use_modules.find('c') != -1:
        scaler = sklpre.StandardScaler()

        train_d = scaler.fit_transform(train_d)
        test_d = scaler.transform(test_d)

    #univariate selection(d)
    if use_modules.find('d') != -1:
        univ_levels = np.array([1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0001])
        #use_cut = univCV(train_d, train_l, univ_levels,use_wardsize,connect,use_modules)
        use_cut = univCV(train_d, train_l, univ_levels)
        univ_select = sklfs.SelectFpr(alpha=use_cut)

        train_d = univ_select.fit_transform(train_d, train_l)
        test_d = univ_select.transform(test_d)
    else:
        use_cut = '0'

    #train model

    nus = np.array([1])  #set nu threshold
    params = dict(nu=nus)
    model = GridSearchCV(
        estimator=sklsvm.NuSVR(kernel='linear', C=100,
                               degree=1)  #changed from 1000 to 10 for dbm
        ,
        param_grid=params,
        cv=10,
        n_jobs=1,
        scoring='r2')  #TODO changed from mse

    model.fit(train_d, train_l)
    pred = model.predict(test_d)

    use_nu = model.best_params_['nu']
    results = [pred, use_wardsize, use_cut, use_nu]

    return results
def get_search_params(params_builder):
    search_params = {}
    safe_eval = SafeEval(load_scipy=True, load_numpy=True)
    safe_eval_es = SafeEval(load_estimators=True)

    for p in params_builder['param_set']:
        search_p = p['search_param_selector']['search_p']
        if search_p.strip() == '':
            continue
        param_type = p['search_param_selector']['selected_param_type']

        lst = search_p.split(':')
        assert (
            len(lst) == 2
        ), "Error, make sure there is one and only one colon in search parameter input."
        literal = lst[1].strip()
        param_name = lst[0].strip()
        if param_name:
            if param_name.lower() == 'n_jobs':
                sys.exit("Parameter `%s` is invalid for search." % param_name)
            elif not param_name.endswith('-'):
                ev = safe_eval(literal)
                if param_type == 'final_estimator_p':
                    search_params['estimator__' + param_name] = ev
                else:
                    search_params['preprocessing_' + param_type[5:6] + '__' +
                                  param_name] = ev
            else:
                # only for estimator eval, add `-` to the end of param
                #TODO maybe add regular express check
                ev = safe_eval_es(literal)
                for obj in ev:
                    if 'n_jobs' in obj.get_params():
                        obj.set_params(n_jobs=N_JOBS)
                if param_type == 'final_estimator_p':
                    search_params['estimator__' + param_name[:-1]] = ev
                else:
                    search_params['preprocessing_' + param_type[5:6] + '__' +
                                  param_name[:-1]] = ev
        elif param_type != 'final_estimator_p':
            #TODO regular express check ?
            ev = safe_eval_es(literal)
            preprocessors = [
                preprocessing.StandardScaler(),
                preprocessing.Binarizer(),
                preprocessing.Imputer(),
                preprocessing.MaxAbsScaler(),
                preprocessing.Normalizer(),
                preprocessing.MinMaxScaler(),
                preprocessing.PolynomialFeatures(),
                preprocessing.RobustScaler(),
                feature_selection.SelectKBest(),
                feature_selection.GenericUnivariateSelect(),
                feature_selection.SelectPercentile(),
                feature_selection.SelectFpr(),
                feature_selection.SelectFdr(),
                feature_selection.SelectFwe(),
                feature_selection.VarianceThreshold(),
                decomposition.FactorAnalysis(random_state=0),
                decomposition.FastICA(random_state=0),
                decomposition.IncrementalPCA(),
                decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
                decomposition.LatentDirichletAllocation(random_state=0,
                                                        n_jobs=N_JOBS),
                decomposition.MiniBatchDictionaryLearning(random_state=0,
                                                          n_jobs=N_JOBS),
                decomposition.MiniBatchSparsePCA(random_state=0,
                                                 n_jobs=N_JOBS),
                decomposition.NMF(random_state=0),
                decomposition.PCA(random_state=0),
                decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
                decomposition.TruncatedSVD(random_state=0),
                kernel_approximation.Nystroem(random_state=0),
                kernel_approximation.RBFSampler(random_state=0),
                kernel_approximation.AdditiveChi2Sampler(),
                kernel_approximation.SkewedChi2Sampler(random_state=0),
                cluster.FeatureAgglomeration(),
                skrebate.ReliefF(n_jobs=N_JOBS),
                skrebate.SURF(n_jobs=N_JOBS),
                skrebate.SURFstar(n_jobs=N_JOBS),
                skrebate.MultiSURF(n_jobs=N_JOBS),
                skrebate.MultiSURFstar(n_jobs=N_JOBS),
                imblearn.under_sampling.ClusterCentroids(random_state=0,
                                                         n_jobs=N_JOBS),
                imblearn.under_sampling.CondensedNearestNeighbour(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.EditedNearestNeighbours(random_state=0,
                                                                n_jobs=N_JOBS),
                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.InstanceHardnessThreshold(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.NearMiss(random_state=0,
                                                 n_jobs=N_JOBS),
                imblearn.under_sampling.NeighbourhoodCleaningRule(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.OneSidedSelection(random_state=0,
                                                          n_jobs=N_JOBS),
                imblearn.under_sampling.RandomUnderSampler(random_state=0),
                imblearn.under_sampling.TomekLinks(random_state=0,
                                                   n_jobs=N_JOBS),
                imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.RandomOverSampler(random_state=0),
                imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.BorderlineSMOTE(random_state=0,
                                                       n_jobs=N_JOBS),
                imblearn.over_sampling.SMOTENC(categorical_features=[],
                                               random_state=0,
                                               n_jobs=N_JOBS),
                imblearn.combine.SMOTEENN(random_state=0),
                imblearn.combine.SMOTETomek(random_state=0)
            ]
            newlist = []
            for obj in ev:
                if obj is None:
                    newlist.append(None)
                elif obj == 'all_0':
                    newlist.extend(preprocessors[0:36])
                elif obj == 'sk_prep_all':  # no KernalCenter()
                    newlist.extend(preprocessors[0:8])
                elif obj == 'fs_all':
                    newlist.extend(preprocessors[8:15])
                elif obj == 'decomp_all':
                    newlist.extend(preprocessors[15:26])
                elif obj == 'k_appr_all':
                    newlist.extend(preprocessors[26:30])
                elif obj == 'reb_all':
                    newlist.extend(preprocessors[31:36])
                elif obj == 'imb_all':
                    newlist.extend(preprocessors[36:55])
                elif type(obj) is int and -1 < obj < len(preprocessors):
                    newlist.append(preprocessors[obj])
                elif hasattr(obj, 'get_params'):  # user object
                    if 'n_jobs' in obj.get_params():
                        newlist.append(obj.set_params(n_jobs=N_JOBS))
                    else:
                        newlist.append(obj)
                else:
                    sys.exit("Unsupported preprocessor type: %r" % (obj))
            search_params['preprocessing_' + param_type[5:6]] = newlist
        else:
            sys.exit("Parameter name of the final estimator can't be skipped!")

    return search_params