class VotingEnsemble(BaseClassifier): def __init__(self, feature_length, num_classes, x=10): super().__init__(feature_length, num_classes) self.model = VotingClassifier(estimators=[ ('gba', GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)), ('knn', KNeighborsClassifier(metric='manhattan', weights='distance', n_neighbors=3)), ('Nc', NearestCentroid(metric='manhattan')), ('nvb', GaussianNB()), ('rf', RandomForestClassifier(n_estimators=10, criterion='entropy')), ('svmlin', svm.SVC(kernel='linear')), ('svmpol', svm.SVC(kernel='poly')), ('svmrbf', svm.SVC(kernel='rbf')) ], voting='hard') self.num_classes = num_classes def train(self, features, labels): """ Using a set of features and labels, trains the classifier and returns the training accuracy. :param features: An MxN matrix of features to use in prediction :param labels: An M row list of labels to train to predict :return: Prediction accuracy, as a float between 0 and 1 """ labels = self.labels_to_categorical(labels) self.model.fit(features, labels) accuracy = self.model.score(features, labels) return accuracy # make sure you save model using the same library as we used in machine learning price-predictor def predict(self, features, labels): """ Using a set of features and labels, predicts the labels from the features, and returns the accuracy of predicted vs actual labels. :param features: An MxN matrix of features to use in prediction :param labels: An M row list of labels to test prediction accuracy on :return: Prediction accuracy, as a float between 0 and 1 """ label_train = self.labels_to_categorical(labels) labels = self.model.predict(features) accuracy = self.model.score(features, label_train) return accuracy def get_prediction(self, features): return self.model.predict(features) def reset(self): """ Resets the trained weights / parameters to initial state :return: """ pass def labels_to_categorical(self, labels): _, IDs = unique(labels, return_inverse=True) return IDs
('l2', pipe2), ('l3', pipe3), ('l4', pipe4), ('l5', pipe5), ], n_jobs=4) cls.fit(cars_train_X, cars_train_y) # uncomment the 3 lines below if needed to see the accuracy and std-dev of the training set # scores = cross_val_score(cls, cars_train_X, cars_train_y, cv=5, verbose=True) # print(scores) # print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # this reaches about 30% acc # create the predictions and dump to a file for plotting the heatmap y_pred = cls.predict(cars_test_X) with open('5subset_linearsvm_voting.sav', 'wb') as f: pkl.dump((y_pred, cars_test_y), f) y_true = cars_test_y preds = {} for i in range(y_true.shape[0]): if y_true[i] == y_pred[i]: if y_true[i] not in preds: preds[y_true[i]] = 1 else: preds[y_true[i]] += 1 for y in y_true: if y not in preds: preds[y] = 0