def test_equivalence_blend(): """[SequentialEnsemble] Test ensemble equivalence with BlendEnsemble.""" ens = BlendEnsemble() seq = SequentialEnsemble() ens.add(ECM, dtype=np.float64) seq.add('blend', ECM, dtype=np.float64) F = ens.fit(X, y).predict(X) P = seq.fit(X, y).predict(X) np.testing.assert_array_equal(P, F)
def test_run(): """[Blend] 'fit' and 'predict' runs correctly.""" meta = OLS() meta.fit(F, y[10:]) g = meta.predict(P) ens = BlendEnsemble() ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64) ens.add(OLS(), meta=True, dtype=np.float64) ens.fit(X, y) pred = ens.predict(X) np.testing.assert_array_equal(pred, g)
def build_ensemble(proba, **kwargs): """Return an ensemble.""" estimators = [ RandomForestClassifier(random_state=seed), SVC(probability=proba) ] ensemble = BlendEnsemble(**kwargs) ensemble.add(estimators, proba=proba) # Specify 'proba' here ensemble.add_meta(LogisticRegression()) return ensemble
def build_ensemble(proba, **kwargs): """Return an ensemble.""" estimators = [ DecisionTreeClassifier(), LGBMClassifier(learning_rate=0.24188855846184307, max_depth=19, n_estimators=582) ] ensemble = BlendEnsemble(**kwargs) ensemble.add(estimators, proba=proba) ensemble.add_meta(LogisticRegression()) return ensemble
def add_blend(name, models, X_train, Y_train, X_test, Y_test): # Establish and reset variables acc_score_cv = None acc_score = None time_ = None ensemble = BlendEnsemble(scorer=accuracy_score, random_state=seed) ensemble.add(models) # Attach the final meta estimator ensemble.add_meta(SVC()) start = time.time() ensemble.fit(X_train, Y_train) preds = ensemble.predict(X_test) acc_score = accuracy_score(preds, Y_test) end = time.time() time_ = end - start return { "Ensemble": name, "Meta_Classifier": "SVC", "Accuracy_Score": acc_score, "Runtime": time_ }
else: raise ValueError("Not valid data option.") X = np.loadtxt(out, delimiter=",") y = X[:, -1] X = X[:, :-1] return X, y xtrain, ytrain = get_data('train') xtest, ytest = get_data('test') estimators = { 'subsemble': Subsemble(), 'super_learner': SuperLearner(), 'blend_ensemble': BlendEnsemble() } base_learners = [ RandomForestClassifier(n_estimators=500, max_depth=10, min_samples_split=50, max_features=0.6), LogisticRegression(C=1e5), GradientBoostingClassifier() ] for clf in estimators.values(): clf.add([RandomForestClassifier(), LogisticRegression(), MLPClassifier()]) clf.add_meta(SVC())
classifier = np.all(np.unique(Y.to_numpy()) == [0, 1]) outputs = Y.shape[1] # separate the data into training and testing if TIME_SERIES: test_idx = X.index.values[-int(X.shape[0] / 5):] else: np.random.seed(1) test_idx = np.random.choice(a=X.index.values, size=int(X.shape[0] / 5), replace=False) train_idx = np.array(list(set(X.index.values) - set(test_idx))) # set up the model if classifier: model = BlendEnsemble(test_size=0.5, random_state=42, n_jobs=1) model.add(KNeighborsClassifier()) model.add(RandomForestClassifier()) model.add(GaussianNB()) model.add_meta(LogisticRegression(penalty="l1", solver="saga")) else: model = BlendEnsemble(test_size=0.5, random_state=42, n_jobs=1) model.add(KNeighborsRegressor()) model.add(RandomForestRegressor()) model.add(BayesianRidge()) model.add_meta(Lasso()) # train and predict train_predict = pd.DataFrame() test_predict = pd.DataFrame() for j in Y.columns: