コード例 #1
0
def esemble(data,data2,data5,during):
    ensemble = SuperLearner(scorer=accuracy_score, random_state=45, verbose=2)
    ensemble.add(linear_model.LinearRegression())
    ensemble.add_meta([GaussianProcessRegressor()])
    y = data2['prmom'+during+'_f']
    x = data2.drop(['prmom1d_f','prmom1w_f','prmom2w_f','prmom3w_f','uniqcode','date'],axis=1)
    x=x.fillna(0)
    y=np.array(y)
    x=np.array(x)
    ensemble.fit(x,y)
    X= data5.drop(['prmom1d_f','prmom1w_f','prmom2w_f','prmom3w_f','uniqcode','date','pred'],axis=1)
    X=X.fillna(0)
    X=np.array(X)
    preds = ensemble.predict(X)
    data['pred_essemble']=preds
    return data
コード例 #2
0
def perform_ensemble_adaboost(X_train, y_train, X_test, y_test):

    all_objects = [
        "Vase", "Teapot", "Bottle", "Spoon", "Plate", "Mug", "Knife", "Fork",
        "Flask", "Bowl"
    ]

    ensemble = SuperLearner(folds=10,
                            random_state=seed,
                            verbose=2,
                            backend="multiprocessing",
                            scorer=accuracy_score)

    layer_1 = [SVC(kernel='linear', C=8)]
    ensemble.add(layer_1)

    # 95.50
    """Make plots of learning curve"""

    ensemble.add_meta(
        AdaBoostClassifier(
            DecisionTreeClassifier(max_depth=8,
                                   min_samples_split=5,
                                   min_samples_leaf=8)))

    ensemble.fit(X_train, y_train)

    import time

    start = time.time()

    yhat = ensemble.predict(X_test)

    accuracies = cross_val_score(ensemble,
                                 X_test,
                                 y_test,
                                 cv=10,
                                 scoring="accuracy")

    print("Accuracy of Adaboost: {:.2f} %".format(accuracies.mean() * 100))
    print("Standard Deviation of Adaboost: {:.2f} %".format(accuracies.std() *
                                                            100))
コード例 #3
0
def add_superlearner(name, models, X_train, Y_train, X_test, Y_test):
    # Establish and reset variables
    acc_score_cv = None
    acc_score = None
    time_ = None
    ensemble = SuperLearner(scorer=accuracy_score, random_state=seed)

    ensemble.add(models)
    # Attach the final meta estimator
    ensemble.add_meta(SVC())

    start = time.time()
    ensemble.fit(X_train, Y_train)
    preds = ensemble.predict(X_test)
    acc_score = accuracy_score(preds, Y_test)
    end = time.time()
    time_ = end - start

    return {
        "Ensemble": name,
        "Meta_Classifier": "SVC",
        "Accuracy_Score": acc_score,
        "Runtime": time_
    }
コード例 #4
0
    SVC(),
    LassoLarsIC(criterion='bic'),
    ElasticNet(random_state=0),
    BayesianRidge(),
    MLPClassifier(),
    BaggingClassifier(),
    neighbors.KNeighborsClassifier(),
    tree.DecisionTreeClassifier(),
    GradientBoostingClassifier(n_estimators=200)
])

# Attach the final meta estimator
ensemble.add_meta(LogisticRegression())

ensemble.fit(x_train, y_train)
preds = ensemble.predict(x_test)

ensemble_data = pd.DataFrame(ensemble.data)
auroc = roc_auc_score(preds, y_test)
acc = accuracy_score(preds, y_test)

p = precision_score(preds, y_test)
r = recall_score(preds, y_test)

frp, tpr, threshholds = roc_curve(preds, y_test)

fig = plt.figure()
plt.plot(frp, tpr)
plt.show()

ensemble_data.to_csv(
コード例 #5
0
fpr, tpr, thresholds = roc_curve(y_test, ans)
print('AUC:', '%.6f' % auc(fpr, tpr))

#-------------------------------------------------------------------------------------------------#
'''ensemble SL1'''
seed = 2018
np.random.seed(seed)
ensemble = SuperLearner(scorer=accuracy_score, random_state=seed, verbose=2)
ensemble.add([
    ExtraTreesClassifier(n_estimators=25, random_state=seed),
    KNeighborsClassifier(n_neighbors=2),
    AdaBoostClassifier(n_estimators=100)
])
ensemble.add_meta(SVC())
ensemble.fit(X_train, y_train)
ans = ensemble.predict(X_test)
FP, FN, TP, TN = conf_matrix(y_test, ans)
print('--------------------Super Learner--------------------')  #test 78.85%
print('Precision:', '%.6f' % precision_score(y_test, ans))
print('Recall:', '%.6f' % recall_score(y_test, ans))
fpr, tpr, thresholds = roc_curve(y_test, ans)
print('AUC:', '%.6f' % auc(fpr, tpr))
'''ensemble SL2'''
#seed = 2018
#np.random.seed(seed)
#ensemble = SuperLearner(scorer=accuracy_score, random_state=seed, verbose=2)
#ensemble.add([ExtraTreesClassifier(n_estimators=30,random_state=seed),AdaBoostClassifier(n_estimators=100)])
#ensemble.add_meta(SVC())
#ensemble.fit(X_train,y_train)
#ans = ensemble.predict(X_test)
#FP,FN,TP,TN = conf_matrix(y_test,ans)
コード例 #6
0
ファイル: mlens_multiL.py プロジェクト: jdc5884/hsi_atk
seed = 2017
np.random.seed(seed)

data = load_iris()
idx = np.random.permutation(150)
X = data.data[idx]
y = data.target[idx]

# Building an ensemble
from mlens.ensemble import SuperLearner
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# --- Multi-layer ensembles ---

ensemble = SuperLearner(scorer=accuracy_score, random_state=seed)

# Build first layer
ensemble.add([RandomForestClassifier(random_state=seed), LogisticRegression()])

# Build the second layer
ensemble.add([LogisticRegression(), SVC()])

# Attach final meta estimator
ensemble.add_meta(SVC())

ensemble.fit(X[:75], y[:75])
preds = ensemble.predict(X[75:])
print("Fit data:\n%r" % ensemble.data)
コード例 #7
0
# start counting time for training
time_train_start = time.clock()

# Fit ensemble
ensemble.fit(training_data, training_labels)

# print training time
time_train_end = time.clock()
print("Training finished, training time: %g seconds \n" %
      (time_train_end - time_train_start))

# start counting time for testing
time_test_start = time.clock()

# Predict
preds = ensemble.predict(test_data)

# print testing time
time_test_end = time.clock()
print("Testing finished, testing time: %g seconds  \n" %
      (time_test_end - time_test_start))

print("Fit data:\n%r" % ensemble.data)

print("Prediction score: %.6f" % accuracy_score(preds, test_labels))

# Predict
preds_even = ensemble.predict(test_data_even)

print("Fit data:\n%r" % ensemble.data)
コード例 #8
0
                                 xtest,
                                 verbose=False)
    print("\nEnsemble (Stacking) ROC-AUC score: %.3f" %
          roc_auc_score(ytest, p))

    # Instantiate the ensemble with 10 folds
    ensemble = SuperLearner(folds=10,
                            random_state=SEED,
                            verbose=2,
                            backend="multiprocessing")

    # Add the base learners and the meta learner
    ensemble.add(list(base_learners.values()), proba=True)
    ensemble.add_meta(meta_learner, proba=True)

    # Train the ensemble
    ensemble.fit(xtrain, ytrain)

    # Predict the test set
    p_sl = ensemble.predict_proba(xtest)

    print("\nSuper Learner ROC-AUC score: %.3f" %
          roc_auc_score(ytest, p_sl[:, 1]))
    plot_roc_curve(ytest, p.reshape(-1, 1), P.mean(axis=1), ["Simple average"],
                   "Super Learner")

    print('-------------------------------------')
    print(test.head())
    y_pred = ensemble.predict(test.iloc[:, 1:].values)
    print(y_pred)
コード例 #9
0
print(yt)
print(yv)

Xt.fillna(-1)
Xv.fillna(-1)
yt.fillna(-1)
yv.fillna(-1)

print(Xt)
'''
for clf in stacked_clf_list:
    ensemble = SuperLearner(scorer=accuracy_score, random_state=seed, folds=10)
    ensemble.add(clf[0])
    ensemble.add_meta(lr)
    ensemble.fit(Xt, yt)
    preds = ensemble.predict(Xv)
    accuracy = accuracy_score(preds, yv)

    if accuracy > best_combination[0]:
        best_combination[0] = accuracy
        best_combination[1] = clf[1]
        preds = ensemble.predict(X_test)
        best_preds = preds

    print(f"Accuracy score: {accuracy} {clf[1]}")
    print(
        f"\nBest stacking model is {best_combination[1]} with accuracy of: {best_combination[0]}"
    )  # Output

print(best_preds)
preds = best_preds.tolist()
コード例 #10
0
            num_in_layer = int(layer_weights[j] / weights_total * num_to_slot)

            layerlist = []

            for k in range(num_in_layer):
                layerlist.append(eval_ind.pop())

            ens.add(layerlist)

        # then add the meta model
        ens.add_meta(lgbm(n_estimators=1000, verbose=-1))

        try:
            ens.fit(X_train, y_train)
            train_score = f1_score(ens.predict(X_train), y_train)
            test_score = f1_score(ens.predict(X_test), y_test)
            real_score = train_score * test_score
            print(' Training score is {}'.format(train_score))
            print(' Testing score is {}'.format(test_score))
            print(' Real score is {}'.format(real_score))
        except:
            print(' There was an error with this one. Throwing it out')
            continue

        if real_score > highest_score:
            print(' New highest score found!')
            highest_score = real_score
            winning_model = ens
'''
for its in range(5):
コード例 #11
0
weight_dict = max_params['params']
final_weights = []

for model_idx in weight_dict.keys():

    final_weights.append(weight_dict[model_idx])

ens.add_meta(VotingClassifier(voters_zipped, weights=final_weights))

print()
print('Refitting the whole model with the new meta layer!')
ens.fit(X_train, y_train)
print()
print('Final predictions')

train_preds = ens.predict(X_train)
optim_preds = ens.predict(X_test)
#final_preds = ens.predict(X_holdout)

print()
print('Training score = {}'.format(error(train_preds, y_train)))
print('Test score = {}'.format(error(optim_preds, y_test)))
#print('Holdout score = {}'.format(error(final_preds, y_holdout)))

##### now we should save the model please
##### do several runs and save the best one
##### saving the features!
##### clustering as a feature???
##### randomizing the balance of the generated data for training initial pipelines
##### we shouldn't require at least 1 hidden layer
##### for training the voting models, what if we cv split the data coming in
コード例 #12
0
val_train, val_test = train_test_split(train,test_size=0.3,random_state=SEED,stratify=train['Survived'])
val_Xtrain=val_train[val_train.columns[1:]]
val_ytrain=val_train[val_train.columns[:1]]
val_Xtest=val[val_test.columns[1:]]
val_ytest=val[val_test.columns[:1]]
# Instantiate the ensemble with 10 folds
super_learner = SuperLearner(folds=10,random_state=SEED,verbose=2,backend='multiprocessing')
# Add the base learners and the meta learner
super_learner.add(list(base_learners().values()),proba=True)
super_learner.add_meta(LogisticRegression(), proba=True)

# Train the ensemble
super_learner.fit(val_Xtrain,val_ytrain)
# predict the test set
p_ens = super_learner.predict(val_Xtest)[:,1]
p_ens_label = 1*(p_ens>=0.5)
print('The acccuracy of super learner:',metrics.accuracy_score(p_ens_label, val_ytest))


# ### Producing the Submission file
# 
# Finally having trained and fit the base and meta learners, we can now output the predictions into the proper format for submission to the Titanic competition as follows:

# In[ ]:


# Generate Submission File 
Submission = pd.DataFrame({ 'PassengerId': PassengerId_test,
                            'Survived': P_ensemble })
Submission.to_csv("Submission.csv", index=False)
コード例 #13
0
# Intermediate layer, keep propagating, but add a preprocessing
# pipeline that selects a subset of the input
ensemble.add(estimators,
             preprocessing=[Subset([2, 3])],
             propagate_features=[0, 1])

##############################################################################
# In the above example, the two first features of the original input data
# will be propagated through both layers, but the second layer will not be
# trained on it. Instead, it will only see the predictions made by the base
# learners in the first layer.

ensemble.fit(X, y)
n = list(ensemble.layer_2.learners[0].learner
         )[0].estimator.feature_importances_.shape[0]
m = ensemble.predict(X).shape[1]
print("Num features seen by estimators in intermediate layer: %i" % n)
print("Num features in the output array of the intermediate layer: %i" % m)

##############################################################################
# .. _proba-tutorial:
#
# Probabilistic ensemble learning
# -------------------------------
#
# When the target to predict is a class label, it can often be beneficial to
# let higher-order layers or the meta learner learn from *class probabilities*,
# as opposed to the predicted class. Scikit-learn classifiers can return a
# matrix that, for each observation in the test set, gives the probability that
# the observation belongs to the a given class. While we are ultimately
# interested in class membership, this information is much richer that just
コード例 #14
0
ファイル: network1.py プロジェクト: yikun-li/ML_Project
from mlens.ensemble import SuperLearner
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# %% Preparing the dataset and the putput label
dataset = np.loadtxt('../dataset/train.csv', dtype=str, delimiter=",")
dataset, outcome = prgm1.pre_processing(dataset)
partition = np.round(0.8 * dataset.shape[0]).__int__()
train_set = dataset[0:partition, :]
test_set = dataset[partition:, :]

# %% Training

test_outcome = np.array(outcome[partition:]).astype(int)
train_outcome = np.array(outcome[0:partition]).astype(int)

seed = 2017
np.random.seed(seed)
ensemble = SuperLearner(scorer=accuracy_score, random_state=seed, verbose=2)
# Build the first layer
ensemble.add([RandomForestClassifier(random_state=seed), SVC()])
# # Attach the final meta estimator
ensemble.add_meta(LogisticRegression())
# # Fit ensemble
ensemble.fit(train_set, train_outcome, gamma="auto")
# # Predict
preds = ensemble.predict(test_set)
print("Fit data:\n%r" % ensemble.data)
print("Prediction score: %.3f" % accuracy_score(preds, test_outcome))