예제 #1
0
def stacking_gaussian(var_smoothing=1e-9):
    estimadores = [('svm', svm()), ('xgboost', xgboost()),
                   ('random_forest', random_forest())]
    cv = utils.kfold_for_cross_validation()
    stacking = StackingClassifier(
        estimators=estimadores,
        final_estimator=GaussianNB(var_smoothing=var_smoothing),
        stack_method="predict_proba",
        cv=cv)
    return stacking
예제 #2
0
 def stackModel(self):
     x_train_smote, y_train_smote = self.over_sampling()
     estimators = [
         ('rf', RandomForestClassifier(random_state=42)),
         ('xgboost', XGBClassifier())
     ]
     clf = StackingClassifier(
         estimators=estimators, final_estimator=LogisticRegression()
     )
     clf.fit(x_train_smote, y_train_smote.values.ravel())
예제 #3
0
def week10(C, random_state, criterion, min_samples_leaf, max_leaf_samples,
           n_estimators, solver, cv, clazz, images):
    trainData, Y = catsvsdogs.train
    Y = [(y + 1) % 2 for y in Y]

    from sklearn.ensemble import BaggingClassifier
    from sklearn.tree import DecisionTreeClassifier

    tree = DecisionTreeClassifier(
        criterion=criterion,  # критерий разделения
        min_samples_leaf=min_samples_leaf,  # минимальное число объектов в листе
        max_leaf_nodes=max_leaf_samples,  # максимальное число листьев
        random_state=random_state)
    bagging = BaggingClassifier(
        tree,  # базовый алгоритм
        n_estimators=n_estimators,  # количество деревьев
        random_state=random_state)
    bagging.fit(trainData, Y)

    from sklearn.svm import LinearSVC

    svm = LinearSVC(random_state=random_state, C=C)
    svm.fit(trainData, Y)

    from sklearn.ensemble import RandomForestClassifier

    forest = RandomForestClassifier(
        n_estimators=n_estimators,  # количество деревьев
        criterion=criterion,  # критерий разделения
        min_samples_leaf=min_samples_leaf,  # минимальное число объектов в листе
        max_leaf_nodes=max_leaf_samples,  # максимальное число листьев
        random_state=random_state)
    forest.fit(trainData, Y)

    from sklearn.linear_model import LogisticRegression

    lr = LogisticRegression(solver=solver, random_state=random_state)

    from sklearn.ensemble import StackingClassifier

    base_estimators = [('SVM', svm), ('Bagging DT', bagging),
                       ('DecisionForest', forest)]
    sclf = StackingClassifier(estimators=base_estimators,
                              final_estimator=lr,
                              cv=cv)
    sclf.fit(trainData, Y)

    accuracy = sclf.score(trainData, Y)

    probas = []
    for img in images:
        histt = catsvsdogs.test[img].reshape(1, -1)
        probas += [(img, sclf.predict_proba(histt)[0][clazz])]

    return {'accuracy': accuracy, 'probas': probas}
예제 #4
0
    def stacking_model(
            self,
            estimator=None,
            final_estimator=sklearn.linear_model.LogisticRegression(),
            cv=2,
            scoring=['roc_auc_ovr'],
            sort=None,
            estimator_params={},
            fit_params={},
            verbose=True,
            n_jobs=-1):
        if sort is None:
            sort = scoring[0]
        estimator_model = self.choose_model(estimator=estimator,
                                            estimator_params=estimator_params,
                                            fit_params=fit_params)

        model_stacking = []
        for name_model, model in estimator_model.items():
            try:
                estimator = model.estimator
            except:
                estimator = model
            model_stacking.append((name_model, estimator))
        name_model = 'classification-stacking_model'
        LOGGER.info('TRY STACKING MODEL')
        estimator = StackingClassifier(estimators=model_stacking,
                                       final_estimator=final_estimator,
                                       cv=cv,
                                       n_jobs=n_jobs,
                                       verbose=verbose)
        scores = sklearn.model_selection.cross_validate(
            estimator=estimator,
            X=self.X,
            y=self.y,
            scoring=scoring,
            cv=cv,
            n_jobs=n_jobs,
            verbose=verbose,
            fit_params=fit_params,
            return_train_score=True,
            return_estimator=True,
            error_score=-1)
        self.estimator['classification-stackingclassifer'] = scores[
            'estimator'][np.argmax(scores['test_' + sort])]
        scores.pop('estimator')
        name_model = ''.join(name_model.split('-')[1:])
        for key, values in scores.items():
            for i, value in enumerate(values):
                if i not in self.metrics.keys():
                    self.metrics[i] = {}
                if name_model not in self.metrics[i].keys():
                    self.metrics[i][name_model] = dict()
                self.metrics[i][name_model][key] = value
        return self
def get_stacked_model():
    level0 = [
        ('DecisionTree', decision_tree),
        ('K_NearestNeighbors', k_nearest_neighbors),
        ('RandomForest', random_forest),
        ('SVM_SVC', svm_svc),
        ('SVM_NuSVC', svm_nu),
        ('MLPClassifier', mlpc),
    ]
    level1 = LogisticRegression(random_state=RANDOM_STATE)
    return StackingClassifier(estimators=level0, final_estimator=level1, cv=10)
def get_stacking():
    level0 = list()
    level0.append(('randomforest', RandomForestClassifier()))
    level0.append(('cart', DecisionTreeClassifier()))
    level0.append(('svm', SVC()))
    level0.append(('xgb', XGBClassifier()))
    #level0.append(('mlp', MLPClassifier()))
    level1 = LogisticRegression(max_iter=3000)  # define meta learner model
    model = StackingClassifier(estimators=level0, final_estimator=level1,
                               cv=5)  # define the stacking ensemble
    return model
예제 #7
0
def test_stacking_classifier_sample_weight_fit_param():
    # check sample_weight is passed to all invocations of fit
    stacker = StackingClassifier(
        estimators=[
            ('lr', CheckingClassifier(expected_fit_params=['sample_weight']))
        ],
        final_estimator=CheckingClassifier(
            expected_fit_params=['sample_weight']
        )
    )
    stacker.fit(X_iris, y_iris, sample_weight=np.ones(X_iris.shape[0]))
예제 #8
0
def make_model(X_train, y_train):
    '''
    fits and returns a stacking model based on the data passed in
    '''
    estimators = [('rf', RandomForestClassifier()),
                  ('log', LogisticRegression(solver='liblinear')),
                  ('grad', GradientBoostingClassifier())]
    stack = StackingClassifier(estimators=estimators,
                               final_estimator=LogisticRegression(),
                               cv=5)
    stack.fit(X_train, y_train)
    return stack
예제 #9
0
 def get_mod_stacking(self, given_modalities, clf):
     estimators = []
     for m in given_modalities:
         text_pipe = Pipeline([('select',
                                filter_cols(self.modalities_df.loc[
                                    self.modalities_df.modality.isin([m]),
                                    'feature'])), ('pred', clf)])
         estimators.append((m, text_pipe))
     clf = StackingClassifier(estimators=estimators,
                              final_estimator=LogisticRegression(),
                              n_jobs=-1)
     return clf
예제 #10
0
def train(x, y):
    cw = list(class_weight.compute_class_weight('balanced', np.unique(y), y))
    lr = LogisticRegressionCV(cv=5,
                              class_weight='balanced',
                              scoring='f1_macro',
                              verbose=10,
                              random_state=0)
    clf1 = lgb.LGBMClassifier(
        objective='multi:softmax',
        n_estimators=900,  #900
        max_depth=11,  #8
        num_leaves=90,  #90
        learning_rate=0.17,
        feature_fraction=0.7,
        min_child_samples=5,
        min_child_weight=0.001,
        bagging_fraction=1,
        bagging_freq=0,
        reg_alpha=0.015,
        reg_lambda=0,
        cat_smooth=0,
        #device= 'gpu',
        #gpu_platform_id= 1,
        #gpu_device_id= 0,
        class_weight='balanced',
        random_state=0,
        n_jobs=-1)
    #听说lightgbm里的 随机森林比sklearn要快?但是我莫名奇妙分数下去了,改回来了
    clf2 = RandomForestClassifier(n_estimators=1000,
                                  random_state=0,
                                  n_jobs=-1,
                                  class_weight='balanced')
    clf3 = CatBoostClassifier(
        iterations=2000,
        verbose=400,
        early_stopping_rounds=200,  #task_type='GPU',
        #border_count=254,
        loss_function='MultiClass',
        class_weights=cw,
        depth=8,
        l2_leaf_reg=0.06,
        random_strength=0.01,
        random_state=0)
    clf = StackingClassifier(estimators=[('lgb', clf1), ('rf', clf2),
                                         ('catboost', clf3)],
                             cv=5,
                             final_estimator=lr,
                             stack_method='predict_proba',
                             verbose=10,
                             n_jobs=1)

    clf.fit(x, y)
    return clf
예제 #11
0
def get_stacking():
    # define the base models
    level0 = list()
    level0.append(('lr', LogisticRegression()))  #逻辑回归
    level0.append(('knn', KNeighborsClassifier()))  # K邻近
    level0.append(('rf', RandomForestClassifier()))  # 随机森林
    level0.append(('bayes', GaussianNB()))  # 朴素贝叶斯
    # define meta learner model
    level1 = LogisticRegression()  # 用逻辑回归算法作为元模型
    # define the stacking ensemble
    model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
    return model
예제 #12
0
def model_stack(X_train, y_train, X_test, y_test):
    estimators = [('xgb', XGBClassifier()), ('lgb', lgb.LGBMClassifier())]
    model = StackingClassifier(estimators=estimators,
                               final_estimator=LogisticRegression())

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    stack_accuracy = f1_score(y_test, y_pred, average='weighted')
    stack_f1 = accuracy_score(y_test, y_pred)

    return stack_accuracy, stack_f1
예제 #13
0
def test_stacking_classifier_drop_estimator():
    # prescale the data to avoid convergence warning without using a pipeline
    # for later assert
    X_train, X_test, y_train, _ = train_test_split(scale(X_iris),
                                                   y_iris,
                                                   stratify=y_iris,
                                                   random_state=42)
    estimators = [('lr', 'drop'), ('svc', LinearSVC(random_state=0))]
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf = StackingClassifier(estimators=[('svc', LinearSVC(random_state=0))],
                             final_estimator=rf,
                             cv=5)
    clf_drop = StackingClassifier(estimators=estimators,
                                  final_estimator=rf,
                                  cv=5)

    clf.fit(X_train, y_train)
    clf_drop.fit(X_train, y_train)
    assert_allclose(clf.predict(X_test), clf_drop.predict(X_test))
    assert_allclose(clf.predict_proba(X_test), clf_drop.predict_proba(X_test))
    assert_allclose(clf.transform(X_test), clf_drop.transform(X_test))
def run_StackingClassifier(params: Dict[str, Any]):
    from sklearn.svm import SVC
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier, StackingClassifier
    clf1 = LogisticRegression(multi_class='multinomial')
    clf2 = RandomForestClassifier(n_estimators=50)
    clf3 = SVC()

    clf = StackingClassifier(**params,
                             estimators=[('lr', clf1), ('rf', clf2),
                                         ("svc", clf3)])
    return clf
예제 #15
0
def test_stacking():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    df = DF.copy()
    numeric_cols = df.select_dtypes("number").columns
    categorical_cols = [
        col for col in df.columns
        if (col not in numeric_cols and not col == CLASS_FEAT)
    ]
    dum_df = pd.get_dummies(df[categorical_cols])
    for col in numeric_cols:
        dum_df[col] = df[col]
    dum_df[CLASS_FEAT] = df[CLASS_FEAT]
    sktrain, sktest = df_shuffled_split(dum_df, random_state=42)
    sktrain_x, sktrain_y = sktrain.drop(CLASS_FEAT, axis=1), train[CLASS_FEAT]
    sktest_x, sktest_y = sktest.drop(CLASS_FEAT, axis=1), test[CLASS_FEAT]

    lone_tree = DecisionTreeClassifier(random_state=42)
    lone_tree.fit(sktrain_x, sktrain_y)
    lone_tree_score = lone_tree.score(sktest_x, sktest_y)
    # print('lone_tree_score',lone_tree_score)

    irep_tree = SVC(random_state=42)
    irep_stack_estimators = [("irep", irep), ("tree", irep_tree)]
    irep_stack = StackingClassifier(estimators=irep_stack_estimators,
                                    final_estimator=LogisticRegression())
    irep_stack.fit(sktrain_x, sktrain_y)
    irep_stack_score = irep_stack.score(sktest_x, sktest_y)
    # print('irep_stack_score', irep_stack_score)
    assert irep_stack_score != lone_tree_score

    rip_tree = DecisionTreeClassifier(random_state=42)
    rip_stack_estimators = [("rip", rip), ("tree", rip_tree)]
    rip_stack = StackingClassifier(estimators=rip_stack_estimators,
                                   final_estimator=LogisticRegression())
    rip_stack.fit(sktrain_x, sktrain_y)
    rip_stack_score = rip_stack.score(sktest_x, sktest_y)
    # print('rip_stack_score',rip_stack_score)
    assert rip_stack_score != lone_tree_score
예제 #16
0
def test_stacking_classifier_drop_binary_prob():
    # check that classifier will drop one of the probability column for
    # binary classification problem

    # Select only the 2 first classes
    X_, y_ = scale(X_iris[:100]), y_iris[:100]

    estimators = [('lr', LogisticRegression()),
                  ('rf', RandomForestClassifier())]
    clf = StackingClassifier(estimators=estimators)
    clf.fit(X_, y_)
    X_meta = clf.transform(X_)
    assert X_meta.shape[1] == 2
예제 #17
0
def get_stacking():
    # define the base models
    level0 = list()
    level0.append(('lr', LogisticRegression()))
    level0.append(('knn', KNeighborsClassifier()))
    level0.append(('cart', DecisionTreeClassifier()))
    level0.append(('svm', SVC()))
    level0.append(('bayes', GaussianNB()))
    # define meta learner model
    level1 = XGBClassifier(verbosity=0)
    # define the stacking ensemble
    model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
    return model
예제 #18
0
 def perform_stacking(self):
     eclfs = [(k, v) for k, v in self.__classifiers.items()]
     clf = StackingClassifier(estimators=eclfs,
                              final_estimator=LogisticRegression(),
                              cv=5,
                              verbose=1,
                              n_jobs=-1)
     clf.fit(self.__train_x, self.__train_y)
     scores = clf.score(self.__test_x, self.__test_y)
     print("Accuracy: %0.2f (+/- %0.2f) [%s]" %
           (scores.mean(), scores.std(), 'StackingClassifier'))
     self.plot_conf_mat(clf, 'StackingClassifier')
     return clf
예제 #19
0
    def __init__(self,
                 in_model_code,
                 db,
                 y_col="party",
                 label_col="county_fips",
                 where_clauses=None,
                 data_view="master_data",
                 year_col="year",
                 year_test=2020):
        self.db = db
        self.mc = in_model_code
        self.drop_cols = db.query(ModelDropCol).filter_by(
            model_code_id=self.mc.id).all()

        where = self.db.query(ModelWhereClause).filter_by(
            model_code=self.mc).all()
        if where:
            self.where = " where " + (" and ".join([wc.sql for wc in where]))
        else:
            self.where = ""

        self.engine_string = database_string
        self.query = f"select * from {data_view}{self.where}"
        self.df = pandas.read_sql_query(
            self.query,
            database_string).drop(columns=[dc.column for dc in self.drop_cols])

        self.y = self.df[y_col].to_numpy()
        self.x = self.df.drop(columns=y_col).to_numpy()

        self.model_obj = self.db.query(Model).filter_by(
            model_code=self.mc).first()
        if not self.model_obj:

            rf = RandomForestClassifier(n_estimators=10, random_state=42)
            svr = make_pipeline(
                StandardScaler(),
                LinearSVC(random_state=42, dual=False, max_iter=1000))
            knn = KNeighborsClassifier(n_neighbors=3)
            nb = GaussianNB()
            classifiers = [("rf", rf), ("svr", svr), ("knn", knn), ("nb", nb)]
            self.model = StackingClassifier(
                estimators=classifiers, final_estimator=LogisticRegression())
            self.accuracy = None
            self.model_obj = Model(model_code=self.mc, accuracy=self.accuracy)
            self.db.add(self.model_obj)
            self.train()
            self.save()
        else:
            self.model = pickle.loads(self.model_obj.model_object)
            self.accuracy = self.model_obj.accuracy
예제 #20
0
    def _initClassifier(self):
        """
        Initiate Classifer

        Parameters
        ----------
    
    
        Returns
        -------
        Init Classifier


        Raises
        ------
        ValueError if class argument `classifierClass` unknown.

        """
        if self.classifierClass in [
                "random_forest", "random forest", "ensemble tree"
        ]:
            return RandomForestClassifier(n_estimators=200,
                                          oob_score=True,
                                          min_samples_split=2,
                                          n_jobs=self.n_jobs,
                                          random_state=42)

        elif self.classifierClass == "SVM":

            return SVC(gamma=2, C=1, probability=True)

        elif self.classifierClass == "GradientBoost":

            return GradientBoostingClassifier(n_estimators=200,
                                              random_state=42)

        elif self.classifierClass == "GaussianNB":

            return GaussianNB()

        elif self.classifierClass == "StackedClassifiers":
            estimators = [("rf",
                           RandomForestClassifier(n_estimators=100,
                                                  random_state=42)),
                          ("NB", GaussianNB()),
                          ("SVM", SVC(gamma=2, C=1, probability=True))]

            return StackingClassifier(estimators)

        else:
            raise ValueError("Argument `classifierClass` is not known.")
예제 #21
0
 def _get_stacker(self, mode, estimators, ensemble_config):
     if self.configs['fit']['train_mode'] == 'clf':
         stacker = StackingClassifier(
             estimators=estimators,
             final_estimator=self.get_base_estimator(
                 ensemble_config['model']),
             n_jobs=-1)
     elif self.configs['fit']['train_mode'] == 'reg':
         stacker = StackingRegressor(
             estimators=estimators,
             final_estimator=self.get_base_estimator(
                 ensemble_config['model']),
             n_jobs=-1)
     return stacker
예제 #22
0
def get_nlp_model():
    return StackingClassifier(
        estimators=[('modified_huber_SGD',
                     SGDClassifier(loss="modified_huber",
                                   alpha=0.002,
                                   penalty="l2",
                                   max_iter=10000)),
                    ('LogisticRegression',
                     LogisticRegression(max_iter=10000,
                                        C=0.1,
                                        class_weight='balanced'))],
        final_estimator=LogisticRegression(max_iter=10000,
                                           C=0.1,
                                           class_weight='balanced'))
 def Stacking(self):
     estimators3 = [
         ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
         ('knn', KNeighborsClassifier(n_neighbors=5)),
         ('svm', SVC())]
     estimators2 = [
         ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
         ('svm', SVC())]
     estimators1 = [
         ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
         ('knn', KNeighborsClassifier(n_neighbors=5))]
     estimators4 = [
         ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
         ('svm', SVC())]
     try:
         if (self.svmStackingcheckBox.isChecked() and self.rfcStackingcheckBox.isChecked() and self.knnStackingcheckBox.isChecked()):
             estimators = estimators3
             clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
             stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test)
             self.accuracyEnsembleLBL.setText(str(stackingAccuracy))
         elif (self.svmStackingcheckBox.isChecked() and self.rfcStackingcheckBox.isChecked()):
             estimators = estimators2
             clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
             stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test)
             self.accuracyEnsembleLBL.setText(str(stackingAccuracy))
         elif(self.rfcStackingcheckBox.isChecked() and self.knnStackingcheckBox.isChecked()):
             estimators = estimators1
             clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
             stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test)
             self.accuracyEnsembleLBL.setText(str(stackingAccuracy))
         elif(self.svmStackingcheckBox.isChecked() and self.knnStackingcheckBox.isChecked()):
             estimators = estimators4
             clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
             stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test)
             self.accuracyEnsembleLBL.setText(str(stackingAccuracy))
     except Exception as a:
         print(a)
예제 #24
0
def train_model(model, X_train, y_train):
    '''
    Process one model by training data
    Input: X_train,y_train
    Output: regressor by the need
    '''
    if model == 'XG':
        clsfr = XGBClassifier(n_estimators=400,max_depth=6, learning_rate=0.05, subsample=0.9, colsample_bytree=0.65, min_child_weight=11)
    elif model == 'ADA':
        clsfr = AdaBoostClassifier()
    elif model == 'DT':
        clsfr = DecisionTreeClassifier()
    elif model == 'SVC':
        clsfr = SVC()
    elif model == 'KN':
        clsfr = KNeighborsClassifier(n_neighbors=5, weights="uniform", algorithm="auto",
            leaf_size=30, p=2, metric="minkowski", metric_params=None)
    elif model == 'BG':
        clsfr = BaggingClassifier(base_estimator=RandomForestClassifier())
    elif model == 'ET':
        clsfr = ExtraTreesClassifier()
    elif model == 'RF':
        clsfr = RandomForestClassifier()
    elif model == 'ST':
        estimators = [
            ('MLP',MLPClassifier()),
            ('RF',RandomForestClassifier()),
            ('XG',XGBClassifier()),
            ('ADA',AdaBoostClassifier())
        ]
        clsfr = StackingClassifier(estimators=estimators)
    elif model == 'NSVC':
        clsfr = NuSVC()
    elif model == 'LSVC':
        clsfr = LinearSVC()
    elif model == 'ST2':
        estimators = [
            XGBClassifier(),
            AdaBoostClassifier(),
            RandomForestClassifier(),
            MLPClassifier()]
        clsfr = StackingCVClassifier(classifiers = estimators, meta_classifier = MLPClassifier())
    elif model == 'MLP':
        clsfr = MLPClassifier(learning_rate='adaptive', max_iter=1000)
    elif model == 'GB':
        clsfr = GradientBoostingClassifier()

    clsfr.fit(X_train,y_train)
    return clsfr
def test_stacking_classsifer(final_estimator):
    estimators = [('mlp', MLPClassifier(alpha=0.001)),
                  ('tree', DecisionTreeClassifier())]
    clf = StackingClassifier(
        estimators=estimators, final_estimator=final_estimator)

    html_output = estimator_html_repr(clf)

    assert str(clf) in html_output
    # If final_estimator's default changes from LogisticRegression
    # this should be updated
    if final_estimator is None:
        assert "LogisticRegression(" in html_output
    else:
        assert final_estimator.__class__.__name__ in html_output
def get_stacking():
    # define the base models
    level0 = list()
    level0.append(('lr', LogisticRegression()))     #Analysis showed this model underperformed
    level0.append(('knn', KNeighborsClassifier()))   #Analysis showed this model underperformed
    level0.append(('rf_1',RandomForestClassifier(class_weight='balanced')))
    level0.append(('rf_2',RandomForestClassifier(class_weight='balanced_subsample')))
    level0.append(('cart', DecisionTreeClassifier()))
    
    # define meta learner model
    level1 = RandomForestClassifier()
    
    # define the stacking ensemble
    model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
    return model
예제 #27
0
def get_stacked_models():
    models = list()
    models.append(('SVM', SVC()))
    models.append(('NB', GaussianNB()))
    models.append(('KNN', KNeighborsClassifier(n_neighbors=6)))
    models.append(('DecTree', DecisionTreeClassifier(random_state=1)))
    models.append(('RF', RandomForestClassifier(n_estimators=500)))

    #Meta classifier:
    final_model = LogisticRegression()

    model = StackingClassifier(estimators=models,
                               final_estimator=final_model,
                               cv=5)
    return model
예제 #28
0
def hyperparam_tuned_ensemble_classifier():
    """Ensemble classifier with custom hyperparameters.

    Returns
    -------
    sklearn StackingClassifier object
        The ensemble classifier with custom hyperparameters
    """
    # List of the individual classifiers to be used in the ensemble
    # classifier with their names
    estimators = [('Log Reg', hyperparam_tuned_log_regression()),
                  ('RForest', hyperparam_tuned_random_forest() ),
                  ('SVM'    , hyperparam_tuned_support_vector())] 

    # Ensemble classifier
    return StackingClassifier(estimators=estimators)
예제 #29
0
def test_stacking_classifier_sparse_passthrough(fmt):
    # Check passthrough behavior on a sparse X matrix
    X_train, X_test, y_train, _ = train_test_split(
        sparse.coo_matrix(scale(X_iris)).asformat(fmt),
        y_iris, random_state=42
    )
    estimators = [('lr', LogisticRegression()), ('svc', LinearSVC())]
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf = StackingClassifier(
        estimators=estimators, final_estimator=rf, cv=5, passthrough=True
    )
    clf.fit(X_train, y_train)
    X_trans = clf.transform(X_test)
    assert_allclose_dense_sparse(X_test, X_trans[:, -4:])
    assert sparse.issparse(X_trans)
    assert X_test.format == X_trans.format
예제 #30
0
def rank_stacking_classifer(X, Y):
    # rf = RandomForestClassifier()
    # gbdt = GradientBoostingClassifier()
    # adaboost = AdaBoostRegressor()
    # clf = StackingClassifier(classiers=).fit(X, Y)

    estimators = [('rf', RandomForestClassifier(n_jobs=20)),
                  ('gbdt', GradientBoostingClassifier()),
                  ('AdaBoostRegressor', AdaBoostClassifier())]

    clf = StackingClassifier(estimators=estimators,
                             final_estimator=LogisticRegression())

    clf.fit(X, Y)

    return clf