Example #1
0
def test_equivalence_blend():
    """[SequentialEnsemble] Test ensemble equivalence with BlendEnsemble."""
    ens = BlendEnsemble()
    seq = SequentialEnsemble()

    ens.add(ECM, dtype=np.float64)
    seq.add('blend', ECM, dtype=np.float64)

    F = ens.fit(X, y).predict(X)
    P = seq.fit(X, y).predict(X)

    np.testing.assert_array_equal(P, F)
Example #2
0
def test_run():
    """[Blend] 'fit' and 'predict' runs correctly."""
    meta = OLS()
    meta.fit(F, y[10:])
    g = meta.predict(P)

    ens = BlendEnsemble()
    ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
    ens.add(OLS(), meta=True, dtype=np.float64)

    ens.fit(X, y)

    pred = ens.predict(X)
    np.testing.assert_array_equal(pred, g)
Example #3
0
def build_ensemble(proba, **kwargs):
    """Return an ensemble."""
    estimators = [
        RandomForestClassifier(random_state=seed),
        SVC(probability=proba)
    ]

    ensemble = BlendEnsemble(**kwargs)
    ensemble.add(estimators, proba=proba)  # Specify 'proba' here
    ensemble.add_meta(LogisticRegression())

    return ensemble
def build_ensemble(proba, **kwargs):
    """Return an ensemble."""
    estimators = [
        DecisionTreeClassifier(),
        LGBMClassifier(learning_rate=0.24188855846184307,
                       max_depth=19,
                       n_estimators=582)
    ]

    ensemble = BlendEnsemble(**kwargs)
    ensemble.add(estimators, proba=proba)
    ensemble.add_meta(LogisticRegression())
    return ensemble
def add_blend(name, models, X_train, Y_train, X_test, Y_test):
    # Establish and reset variables
    acc_score_cv = None
    acc_score = None
    time_ = None
    ensemble = BlendEnsemble(scorer=accuracy_score, random_state=seed)

    ensemble.add(models)
    # Attach the final meta estimator
    ensemble.add_meta(SVC())

    start = time.time()
    ensemble.fit(X_train, Y_train)
    preds = ensemble.predict(X_test)
    acc_score = accuracy_score(preds, Y_test)
    end = time.time()
    time_ = end - start

    return {
        "Ensemble": name,
        "Meta_Classifier": "SVC",
        "Accuracy_Score": acc_score,
        "Runtime": time_
    }
Example #6
0
    else:
        raise ValueError("Not valid data option.")

    X = np.loadtxt(out, delimiter=",")
    y = X[:, -1]
    X = X[:, :-1]
    return X, y


xtrain, ytrain = get_data('train')
xtest, ytest = get_data('test')

estimators = {
    'subsemble': Subsemble(),
    'super_learner': SuperLearner(),
    'blend_ensemble': BlendEnsemble()
}

base_learners = [
    RandomForestClassifier(n_estimators=500,
                           max_depth=10,
                           min_samples_split=50,
                           max_features=0.6),
    LogisticRegression(C=1e5),
    GradientBoostingClassifier()
]

for clf in estimators.values():
    clf.add([RandomForestClassifier(), LogisticRegression(), MLPClassifier()])
    clf.add_meta(SVC())
Example #7
0
classifier = np.all(np.unique(Y.to_numpy()) == [0, 1])
outputs = Y.shape[1]

# separate the data into training and testing
if TIME_SERIES:
    test_idx = X.index.values[-int(X.shape[0] / 5):]
else:
    np.random.seed(1)
    test_idx = np.random.choice(a=X.index.values,
                                size=int(X.shape[0] / 5),
                                replace=False)
train_idx = np.array(list(set(X.index.values) - set(test_idx)))

# set up the model
if classifier:
    model = BlendEnsemble(test_size=0.5, random_state=42, n_jobs=1)
    model.add(KNeighborsClassifier())
    model.add(RandomForestClassifier())
    model.add(GaussianNB())
    model.add_meta(LogisticRegression(penalty="l1", solver="saga"))
else:
    model = BlendEnsemble(test_size=0.5, random_state=42, n_jobs=1)
    model.add(KNeighborsRegressor())
    model.add(RandomForestRegressor())
    model.add(BayesianRidge())
    model.add_meta(Lasso())

# train and predict
train_predict = pd.DataFrame()
test_predict = pd.DataFrame()
for j in Y.columns: