Esempio n. 1
0
    async def do_run_async(self):
        training_set = super().load_train_images()
        training_labels = super().load_train_labels()

        test_set = super().load_test_images()
        test_labels = super().load_test_labels()

        # Training and test sets need to be reshaped from 3D (m,28,28) to 2D (m, 784) for the classifiers to be able to
        # use in training phase
        training_set_tr = training_set.reshape((60000, 784))
        test_set_tr = test_set.reshape((10000, 784))

        # Hard Voting (The class with the higher number of votes is output)
        sgd_clf = SGDClassifier()
        rnd_clf = RandomForestClassifier()
        k_clf = KNeighborsClassifier(
        )  # Note: training this is very slow on the MNIST data set

        voting_clf = VotingClassifier(estimators=[('sgd', sgd_clf),
                                                  ('rf', rnd_clf),
                                                  ('k', k_clf)],
                                      voting='hard')

        # Compute and compare the accuracy score. The voting classifier should get an accuracy score better than each individual
        for clf in [sgd_clf, rnd_clf, k_clf, voting_clf]:
            clf.fit(training_set_tr, training_labels)
            predictions = clf.predict(test_set_tr)
            print(type(clf).__name__, accuracy_score(test_labels, predictions))

        # Soft Voting (The class with the highest probability averaged across all classifiers is output)
        # All classifiers in the ensemble need to be able to predict probabilities (predict_proba)
        voting_clf.voting = 'soft'
Esempio n. 2
0
]

named_estimators = [
	("forest_clf", forest_clf),
	("extra_trees_clf", extra_trees_clf),
	("svc_clf", svc_clf),
	("mlp_clf", mlp_clf)
]

voting_clf = VotingClassifier(named_estimators)
voting_clf.fit(X_train, y_train)
voting_clf.score(X_validation, y_validation)

# by default the voting classifier uses hard voting, to change to soft voting, we simply change the scoring
# no need to train the model again
voting_clf.voting = "soft"
voting_clf.score(X_test, y_test)

# stacking ensemble
X_validation_predictions = np.empty((len(X_validation), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
	X_validation_predictions[:, index] = estimator.predict(X_validation)

random_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
random_forest_blender.fit(X_validation_predictions, y_validation)  # train on the output of the previous predictions
print(random_forest_blender.oob_score_)

# test on testing set
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)
However, it did not update the list of _trained_ estimators:

voting_clf.estimators_

So we can either fit the `VotingClassifier` again, or just remove the SVM from the list of trained estimators:

del voting_clf.estimators_[2]

Now let's evaluate the `VotingClassifier` again:

voting_clf.score(X_val, y_val)

A bit better! The SVM was hurting performance. Now let's try using a soft voting classifier. We do not actually need to retrain the classifier, we can just set `voting` to `"soft"`:

voting_clf.voting = "soft"

voting_clf.score(X_val, y_val)

Nope, hard voting wins in this case.

_Once you have found one, try it on the test set. How much better does it perform compared to the individual classifiers?_

voting_clf.voting = "hard"
voting_clf.score(X_test, y_test)

[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]

The voting classifier only very slightly reduced the error rate of the best model in this case.

## 9. Stacking Ensemble
#evaluation of validation set
voting.score(X_val,y_val)

#for each estimator in voting classifier evalute on validation set
[estimator.score(X_val,y_val) for estimator in voting.estimators_]

voting.set_params(svm=None) #change the param value of svm to none

voting.estimators_

#or deleting svm classifier as its outperforms and affect the votiong model
del voting.estimators_[1]

voting.score(X_val, y_val)

voting.voting = 'soft'

voting.voting = 'hard'
    
#test set

[estimator.score(X_test,y_test) for estimator in voting.estimators_]

'''
Exercise: Run the individual classifiers from the previous exercise to make 
predictions on the validation set, and create a new training set with the 
resulting predictions: each training instance is a vector containing the set 
of predictions from all your classifiers for an image, and the target is the 
image's class. Train a classifier on this new training set.
'''
#making empty array for estimator prediction as datatype float32
Esempio n. 5
0
X_train = X[:int(len(X) * train_ratio), :]
y_train = y[:int(len(X) * train_ratio)]
X_test = X[int(len(X) * train_ratio):, :]
y_test = y[int(len(X) * train_ratio):]

log_clf = LogisticRegression(multi_class='multinomial', solver='sag')
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

estimators = [('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)]

# Hard voting
print('\n-------Hard Voting-------')
voting_clf = VotingClassifier(estimators=estimators, voting='hard')

# The accuracy of each classifiers
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    #y_pred = clf.predict_proba(X_test)
    #y_pred = np.argmax(y_pred, axis=1)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

print('\n-------Soft Voting-------')
voting_clf.voting = 'soft'
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_test)
    y_pred = np.argmax(y_pred, axis=1)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))