def stacking_gaussian(var_smoothing=1e-9): estimadores = [('svm', svm()), ('xgboost', xgboost()), ('random_forest', random_forest())] cv = utils.kfold_for_cross_validation() stacking = StackingClassifier( estimators=estimadores, final_estimator=GaussianNB(var_smoothing=var_smoothing), stack_method="predict_proba", cv=cv) return stacking
def stackModel(self): x_train_smote, y_train_smote = self.over_sampling() estimators = [ ('rf', RandomForestClassifier(random_state=42)), ('xgboost', XGBClassifier()) ] clf = StackingClassifier( estimators=estimators, final_estimator=LogisticRegression() ) clf.fit(x_train_smote, y_train_smote.values.ravel())
def week10(C, random_state, criterion, min_samples_leaf, max_leaf_samples, n_estimators, solver, cv, clazz, images): trainData, Y = catsvsdogs.train Y = [(y + 1) % 2 for y in Y] from sklearn.ensemble import BaggingClassifier from sklearn.tree import DecisionTreeClassifier tree = DecisionTreeClassifier( criterion=criterion, # критерий разделения min_samples_leaf=min_samples_leaf, # минимальное число объектов в листе max_leaf_nodes=max_leaf_samples, # максимальное число листьев random_state=random_state) bagging = BaggingClassifier( tree, # базовый алгоритм n_estimators=n_estimators, # количество деревьев random_state=random_state) bagging.fit(trainData, Y) from sklearn.svm import LinearSVC svm = LinearSVC(random_state=random_state, C=C) svm.fit(trainData, Y) from sklearn.ensemble import RandomForestClassifier forest = RandomForestClassifier( n_estimators=n_estimators, # количество деревьев criterion=criterion, # критерий разделения min_samples_leaf=min_samples_leaf, # минимальное число объектов в листе max_leaf_nodes=max_leaf_samples, # максимальное число листьев random_state=random_state) forest.fit(trainData, Y) from sklearn.linear_model import LogisticRegression lr = LogisticRegression(solver=solver, random_state=random_state) from sklearn.ensemble import StackingClassifier base_estimators = [('SVM', svm), ('Bagging DT', bagging), ('DecisionForest', forest)] sclf = StackingClassifier(estimators=base_estimators, final_estimator=lr, cv=cv) sclf.fit(trainData, Y) accuracy = sclf.score(trainData, Y) probas = [] for img in images: histt = catsvsdogs.test[img].reshape(1, -1) probas += [(img, sclf.predict_proba(histt)[0][clazz])] return {'accuracy': accuracy, 'probas': probas}
def stacking_model( self, estimator=None, final_estimator=sklearn.linear_model.LogisticRegression(), cv=2, scoring=['roc_auc_ovr'], sort=None, estimator_params={}, fit_params={}, verbose=True, n_jobs=-1): if sort is None: sort = scoring[0] estimator_model = self.choose_model(estimator=estimator, estimator_params=estimator_params, fit_params=fit_params) model_stacking = [] for name_model, model in estimator_model.items(): try: estimator = model.estimator except: estimator = model model_stacking.append((name_model, estimator)) name_model = 'classification-stacking_model' LOGGER.info('TRY STACKING MODEL') estimator = StackingClassifier(estimators=model_stacking, final_estimator=final_estimator, cv=cv, n_jobs=n_jobs, verbose=verbose) scores = sklearn.model_selection.cross_validate( estimator=estimator, X=self.X, y=self.y, scoring=scoring, cv=cv, n_jobs=n_jobs, verbose=verbose, fit_params=fit_params, return_train_score=True, return_estimator=True, error_score=-1) self.estimator['classification-stackingclassifer'] = scores[ 'estimator'][np.argmax(scores['test_' + sort])] scores.pop('estimator') name_model = ''.join(name_model.split('-')[1:]) for key, values in scores.items(): for i, value in enumerate(values): if i not in self.metrics.keys(): self.metrics[i] = {} if name_model not in self.metrics[i].keys(): self.metrics[i][name_model] = dict() self.metrics[i][name_model][key] = value return self
def get_stacked_model(): level0 = [ ('DecisionTree', decision_tree), ('K_NearestNeighbors', k_nearest_neighbors), ('RandomForest', random_forest), ('SVM_SVC', svm_svc), ('SVM_NuSVC', svm_nu), ('MLPClassifier', mlpc), ] level1 = LogisticRegression(random_state=RANDOM_STATE) return StackingClassifier(estimators=level0, final_estimator=level1, cv=10)
def get_stacking(): level0 = list() level0.append(('randomforest', RandomForestClassifier())) level0.append(('cart', DecisionTreeClassifier())) level0.append(('svm', SVC())) level0.append(('xgb', XGBClassifier())) #level0.append(('mlp', MLPClassifier())) level1 = LogisticRegression(max_iter=3000) # define meta learner model model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5) # define the stacking ensemble return model
def test_stacking_classifier_sample_weight_fit_param(): # check sample_weight is passed to all invocations of fit stacker = StackingClassifier( estimators=[ ('lr', CheckingClassifier(expected_fit_params=['sample_weight'])) ], final_estimator=CheckingClassifier( expected_fit_params=['sample_weight'] ) ) stacker.fit(X_iris, y_iris, sample_weight=np.ones(X_iris.shape[0]))
def make_model(X_train, y_train): ''' fits and returns a stacking model based on the data passed in ''' estimators = [('rf', RandomForestClassifier()), ('log', LogisticRegression(solver='liblinear')), ('grad', GradientBoostingClassifier())] stack = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), cv=5) stack.fit(X_train, y_train) return stack
def get_mod_stacking(self, given_modalities, clf): estimators = [] for m in given_modalities: text_pipe = Pipeline([('select', filter_cols(self.modalities_df.loc[ self.modalities_df.modality.isin([m]), 'feature'])), ('pred', clf)]) estimators.append((m, text_pipe)) clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1) return clf
def train(x, y): cw = list(class_weight.compute_class_weight('balanced', np.unique(y), y)) lr = LogisticRegressionCV(cv=5, class_weight='balanced', scoring='f1_macro', verbose=10, random_state=0) clf1 = lgb.LGBMClassifier( objective='multi:softmax', n_estimators=900, #900 max_depth=11, #8 num_leaves=90, #90 learning_rate=0.17, feature_fraction=0.7, min_child_samples=5, min_child_weight=0.001, bagging_fraction=1, bagging_freq=0, reg_alpha=0.015, reg_lambda=0, cat_smooth=0, #device= 'gpu', #gpu_platform_id= 1, #gpu_device_id= 0, class_weight='balanced', random_state=0, n_jobs=-1) #听说lightgbm里的 随机森林比sklearn要快?但是我莫名奇妙分数下去了,改回来了 clf2 = RandomForestClassifier(n_estimators=1000, random_state=0, n_jobs=-1, class_weight='balanced') clf3 = CatBoostClassifier( iterations=2000, verbose=400, early_stopping_rounds=200, #task_type='GPU', #border_count=254, loss_function='MultiClass', class_weights=cw, depth=8, l2_leaf_reg=0.06, random_strength=0.01, random_state=0) clf = StackingClassifier(estimators=[('lgb', clf1), ('rf', clf2), ('catboost', clf3)], cv=5, final_estimator=lr, stack_method='predict_proba', verbose=10, n_jobs=1) clf.fit(x, y) return clf
def get_stacking(): # define the base models level0 = list() level0.append(('lr', LogisticRegression())) #逻辑回归 level0.append(('knn', KNeighborsClassifier())) # K邻近 level0.append(('rf', RandomForestClassifier())) # 随机森林 level0.append(('bayes', GaussianNB())) # 朴素贝叶斯 # define meta learner model level1 = LogisticRegression() # 用逻辑回归算法作为元模型 # define the stacking ensemble model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5) return model
def model_stack(X_train, y_train, X_test, y_test): estimators = [('xgb', XGBClassifier()), ('lgb', lgb.LGBMClassifier())] model = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) model.fit(X_train, y_train) y_pred = model.predict(X_test) stack_accuracy = f1_score(y_test, y_pred, average='weighted') stack_f1 = accuracy_score(y_test, y_pred) return stack_accuracy, stack_f1
def test_stacking_classifier_drop_estimator(): # prescale the data to avoid convergence warning without using a pipeline # for later assert X_train, X_test, y_train, _ = train_test_split(scale(X_iris), y_iris, stratify=y_iris, random_state=42) estimators = [('lr', 'drop'), ('svc', LinearSVC(random_state=0))] rf = RandomForestClassifier(n_estimators=10, random_state=42) clf = StackingClassifier(estimators=[('svc', LinearSVC(random_state=0))], final_estimator=rf, cv=5) clf_drop = StackingClassifier(estimators=estimators, final_estimator=rf, cv=5) clf.fit(X_train, y_train) clf_drop.fit(X_train, y_train) assert_allclose(clf.predict(X_test), clf_drop.predict(X_test)) assert_allclose(clf.predict_proba(X_test), clf_drop.predict_proba(X_test)) assert_allclose(clf.transform(X_test), clf_drop.transform(X_test))
def run_StackingClassifier(params: Dict[str, Any]): from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier, StackingClassifier clf1 = LogisticRegression(multi_class='multinomial') clf2 = RandomForestClassifier(n_estimators=50) clf3 = SVC() clf = StackingClassifier(**params, estimators=[('lr', clf1), ('rf', clf2), ("svc", clf3)]) return clf
def test_stacking(): irep = IREP(random_state=42) rip = RIPPER(random_state=42) df = DF.copy() numeric_cols = df.select_dtypes("number").columns categorical_cols = [ col for col in df.columns if (col not in numeric_cols and not col == CLASS_FEAT) ] dum_df = pd.get_dummies(df[categorical_cols]) for col in numeric_cols: dum_df[col] = df[col] dum_df[CLASS_FEAT] = df[CLASS_FEAT] sktrain, sktest = df_shuffled_split(dum_df, random_state=42) sktrain_x, sktrain_y = sktrain.drop(CLASS_FEAT, axis=1), train[CLASS_FEAT] sktest_x, sktest_y = sktest.drop(CLASS_FEAT, axis=1), test[CLASS_FEAT] lone_tree = DecisionTreeClassifier(random_state=42) lone_tree.fit(sktrain_x, sktrain_y) lone_tree_score = lone_tree.score(sktest_x, sktest_y) # print('lone_tree_score',lone_tree_score) irep_tree = SVC(random_state=42) irep_stack_estimators = [("irep", irep), ("tree", irep_tree)] irep_stack = StackingClassifier(estimators=irep_stack_estimators, final_estimator=LogisticRegression()) irep_stack.fit(sktrain_x, sktrain_y) irep_stack_score = irep_stack.score(sktest_x, sktest_y) # print('irep_stack_score', irep_stack_score) assert irep_stack_score != lone_tree_score rip_tree = DecisionTreeClassifier(random_state=42) rip_stack_estimators = [("rip", rip), ("tree", rip_tree)] rip_stack = StackingClassifier(estimators=rip_stack_estimators, final_estimator=LogisticRegression()) rip_stack.fit(sktrain_x, sktrain_y) rip_stack_score = rip_stack.score(sktest_x, sktest_y) # print('rip_stack_score',rip_stack_score) assert rip_stack_score != lone_tree_score
def test_stacking_classifier_drop_binary_prob(): # check that classifier will drop one of the probability column for # binary classification problem # Select only the 2 first classes X_, y_ = scale(X_iris[:100]), y_iris[:100] estimators = [('lr', LogisticRegression()), ('rf', RandomForestClassifier())] clf = StackingClassifier(estimators=estimators) clf.fit(X_, y_) X_meta = clf.transform(X_) assert X_meta.shape[1] == 2
def get_stacking(): # define the base models level0 = list() level0.append(('lr', LogisticRegression())) level0.append(('knn', KNeighborsClassifier())) level0.append(('cart', DecisionTreeClassifier())) level0.append(('svm', SVC())) level0.append(('bayes', GaussianNB())) # define meta learner model level1 = XGBClassifier(verbosity=0) # define the stacking ensemble model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5) return model
def perform_stacking(self): eclfs = [(k, v) for k, v in self.__classifiers.items()] clf = StackingClassifier(estimators=eclfs, final_estimator=LogisticRegression(), cv=5, verbose=1, n_jobs=-1) clf.fit(self.__train_x, self.__train_y) scores = clf.score(self.__test_x, self.__test_y) print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), 'StackingClassifier')) self.plot_conf_mat(clf, 'StackingClassifier') return clf
def __init__(self, in_model_code, db, y_col="party", label_col="county_fips", where_clauses=None, data_view="master_data", year_col="year", year_test=2020): self.db = db self.mc = in_model_code self.drop_cols = db.query(ModelDropCol).filter_by( model_code_id=self.mc.id).all() where = self.db.query(ModelWhereClause).filter_by( model_code=self.mc).all() if where: self.where = " where " + (" and ".join([wc.sql for wc in where])) else: self.where = "" self.engine_string = database_string self.query = f"select * from {data_view}{self.where}" self.df = pandas.read_sql_query( self.query, database_string).drop(columns=[dc.column for dc in self.drop_cols]) self.y = self.df[y_col].to_numpy() self.x = self.df.drop(columns=y_col).to_numpy() self.model_obj = self.db.query(Model).filter_by( model_code=self.mc).first() if not self.model_obj: rf = RandomForestClassifier(n_estimators=10, random_state=42) svr = make_pipeline( StandardScaler(), LinearSVC(random_state=42, dual=False, max_iter=1000)) knn = KNeighborsClassifier(n_neighbors=3) nb = GaussianNB() classifiers = [("rf", rf), ("svr", svr), ("knn", knn), ("nb", nb)] self.model = StackingClassifier( estimators=classifiers, final_estimator=LogisticRegression()) self.accuracy = None self.model_obj = Model(model_code=self.mc, accuracy=self.accuracy) self.db.add(self.model_obj) self.train() self.save() else: self.model = pickle.loads(self.model_obj.model_object) self.accuracy = self.model_obj.accuracy
def _initClassifier(self): """ Initiate Classifer Parameters ---------- Returns ------- Init Classifier Raises ------ ValueError if class argument `classifierClass` unknown. """ if self.classifierClass in [ "random_forest", "random forest", "ensemble tree" ]: return RandomForestClassifier(n_estimators=200, oob_score=True, min_samples_split=2, n_jobs=self.n_jobs, random_state=42) elif self.classifierClass == "SVM": return SVC(gamma=2, C=1, probability=True) elif self.classifierClass == "GradientBoost": return GradientBoostingClassifier(n_estimators=200, random_state=42) elif self.classifierClass == "GaussianNB": return GaussianNB() elif self.classifierClass == "StackedClassifiers": estimators = [("rf", RandomForestClassifier(n_estimators=100, random_state=42)), ("NB", GaussianNB()), ("SVM", SVC(gamma=2, C=1, probability=True))] return StackingClassifier(estimators) else: raise ValueError("Argument `classifierClass` is not known.")
def _get_stacker(self, mode, estimators, ensemble_config): if self.configs['fit']['train_mode'] == 'clf': stacker = StackingClassifier( estimators=estimators, final_estimator=self.get_base_estimator( ensemble_config['model']), n_jobs=-1) elif self.configs['fit']['train_mode'] == 'reg': stacker = StackingRegressor( estimators=estimators, final_estimator=self.get_base_estimator( ensemble_config['model']), n_jobs=-1) return stacker
def get_nlp_model(): return StackingClassifier( estimators=[('modified_huber_SGD', SGDClassifier(loss="modified_huber", alpha=0.002, penalty="l2", max_iter=10000)), ('LogisticRegression', LogisticRegression(max_iter=10000, C=0.1, class_weight='balanced'))], final_estimator=LogisticRegression(max_iter=10000, C=0.1, class_weight='balanced'))
def Stacking(self): estimators3 = [ ('rf', RandomForestClassifier(n_estimators=10, random_state=42)), ('knn', KNeighborsClassifier(n_neighbors=5)), ('svm', SVC())] estimators2 = [ ('rf', RandomForestClassifier(n_estimators=10, random_state=42)), ('svm', SVC())] estimators1 = [ ('rf', RandomForestClassifier(n_estimators=10, random_state=42)), ('knn', KNeighborsClassifier(n_neighbors=5))] estimators4 = [ ('rf', RandomForestClassifier(n_estimators=10, random_state=42)), ('svm', SVC())] try: if (self.svmStackingcheckBox.isChecked() and self.rfcStackingcheckBox.isChecked() and self.knnStackingcheckBox.isChecked()): estimators = estimators3 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test) self.accuracyEnsembleLBL.setText(str(stackingAccuracy)) elif (self.svmStackingcheckBox.isChecked() and self.rfcStackingcheckBox.isChecked()): estimators = estimators2 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test) self.accuracyEnsembleLBL.setText(str(stackingAccuracy)) elif(self.rfcStackingcheckBox.isChecked() and self.knnStackingcheckBox.isChecked()): estimators = estimators1 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test) self.accuracyEnsembleLBL.setText(str(stackingAccuracy)) elif(self.svmStackingcheckBox.isChecked() and self.knnStackingcheckBox.isChecked()): estimators = estimators4 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) stackingAccuracy = clf.fit(self.X_train, self.y_train).score(self.X_test, self.y_test) self.accuracyEnsembleLBL.setText(str(stackingAccuracy)) except Exception as a: print(a)
def train_model(model, X_train, y_train): ''' Process one model by training data Input: X_train,y_train Output: regressor by the need ''' if model == 'XG': clsfr = XGBClassifier(n_estimators=400,max_depth=6, learning_rate=0.05, subsample=0.9, colsample_bytree=0.65, min_child_weight=11) elif model == 'ADA': clsfr = AdaBoostClassifier() elif model == 'DT': clsfr = DecisionTreeClassifier() elif model == 'SVC': clsfr = SVC() elif model == 'KN': clsfr = KNeighborsClassifier(n_neighbors=5, weights="uniform", algorithm="auto", leaf_size=30, p=2, metric="minkowski", metric_params=None) elif model == 'BG': clsfr = BaggingClassifier(base_estimator=RandomForestClassifier()) elif model == 'ET': clsfr = ExtraTreesClassifier() elif model == 'RF': clsfr = RandomForestClassifier() elif model == 'ST': estimators = [ ('MLP',MLPClassifier()), ('RF',RandomForestClassifier()), ('XG',XGBClassifier()), ('ADA',AdaBoostClassifier()) ] clsfr = StackingClassifier(estimators=estimators) elif model == 'NSVC': clsfr = NuSVC() elif model == 'LSVC': clsfr = LinearSVC() elif model == 'ST2': estimators = [ XGBClassifier(), AdaBoostClassifier(), RandomForestClassifier(), MLPClassifier()] clsfr = StackingCVClassifier(classifiers = estimators, meta_classifier = MLPClassifier()) elif model == 'MLP': clsfr = MLPClassifier(learning_rate='adaptive', max_iter=1000) elif model == 'GB': clsfr = GradientBoostingClassifier() clsfr.fit(X_train,y_train) return clsfr
def test_stacking_classsifer(final_estimator): estimators = [('mlp', MLPClassifier(alpha=0.001)), ('tree', DecisionTreeClassifier())] clf = StackingClassifier( estimators=estimators, final_estimator=final_estimator) html_output = estimator_html_repr(clf) assert str(clf) in html_output # If final_estimator's default changes from LogisticRegression # this should be updated if final_estimator is None: assert "LogisticRegression(" in html_output else: assert final_estimator.__class__.__name__ in html_output
def get_stacking(): # define the base models level0 = list() level0.append(('lr', LogisticRegression())) #Analysis showed this model underperformed level0.append(('knn', KNeighborsClassifier())) #Analysis showed this model underperformed level0.append(('rf_1',RandomForestClassifier(class_weight='balanced'))) level0.append(('rf_2',RandomForestClassifier(class_weight='balanced_subsample'))) level0.append(('cart', DecisionTreeClassifier())) # define meta learner model level1 = RandomForestClassifier() # define the stacking ensemble model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5) return model
def get_stacked_models(): models = list() models.append(('SVM', SVC())) models.append(('NB', GaussianNB())) models.append(('KNN', KNeighborsClassifier(n_neighbors=6))) models.append(('DecTree', DecisionTreeClassifier(random_state=1))) models.append(('RF', RandomForestClassifier(n_estimators=500))) #Meta classifier: final_model = LogisticRegression() model = StackingClassifier(estimators=models, final_estimator=final_model, cv=5) return model
def hyperparam_tuned_ensemble_classifier(): """Ensemble classifier with custom hyperparameters. Returns ------- sklearn StackingClassifier object The ensemble classifier with custom hyperparameters """ # List of the individual classifiers to be used in the ensemble # classifier with their names estimators = [('Log Reg', hyperparam_tuned_log_regression()), ('RForest', hyperparam_tuned_random_forest() ), ('SVM' , hyperparam_tuned_support_vector())] # Ensemble classifier return StackingClassifier(estimators=estimators)
def test_stacking_classifier_sparse_passthrough(fmt): # Check passthrough behavior on a sparse X matrix X_train, X_test, y_train, _ = train_test_split( sparse.coo_matrix(scale(X_iris)).asformat(fmt), y_iris, random_state=42 ) estimators = [('lr', LogisticRegression()), ('svc', LinearSVC())] rf = RandomForestClassifier(n_estimators=10, random_state=42) clf = StackingClassifier( estimators=estimators, final_estimator=rf, cv=5, passthrough=True ) clf.fit(X_train, y_train) X_trans = clf.transform(X_test) assert_allclose_dense_sparse(X_test, X_trans[:, -4:]) assert sparse.issparse(X_trans) assert X_test.format == X_trans.format
def rank_stacking_classifer(X, Y): # rf = RandomForestClassifier() # gbdt = GradientBoostingClassifier() # adaboost = AdaBoostRegressor() # clf = StackingClassifier(classiers=).fit(X, Y) estimators = [('rf', RandomForestClassifier(n_jobs=20)), ('gbdt', GradientBoostingClassifier()), ('AdaBoostRegressor', AdaBoostClassifier())] clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) clf.fit(X, Y) return clf