class VotingEnsemble(BaseClassifier): def __init__(self, feature_length, num_classes, x=10): super().__init__(feature_length, num_classes) self.model = VotingClassifier(estimators=[ ('gba', GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)), ('knn', KNeighborsClassifier(metric='manhattan', weights='distance', n_neighbors=3)), ('Nc', NearestCentroid(metric='manhattan')), ('nvb', GaussianNB()), ('rf', RandomForestClassifier(n_estimators=10, criterion='entropy')), ('svmlin', svm.SVC(kernel='linear')), ('svmpol', svm.SVC(kernel='poly')), ('svmrbf', svm.SVC(kernel='rbf')) ], voting='hard') self.num_classes = num_classes def train(self, features, labels): """ Using a set of features and labels, trains the classifier and returns the training accuracy. :param features: An MxN matrix of features to use in prediction :param labels: An M row list of labels to train to predict :return: Prediction accuracy, as a float between 0 and 1 """ labels = self.labels_to_categorical(labels) self.model.fit(features, labels) accuracy = self.model.score(features, labels) return accuracy # make sure you save model using the same library as we used in machine learning price-predictor def predict(self, features, labels): """ Using a set of features and labels, predicts the labels from the features, and returns the accuracy of predicted vs actual labels. :param features: An MxN matrix of features to use in prediction :param labels: An M row list of labels to test prediction accuracy on :return: Prediction accuracy, as a float between 0 and 1 """ label_train = self.labels_to_categorical(labels) labels = self.model.predict(features) accuracy = self.model.score(features, label_train) return accuracy def get_prediction(self, features): return self.model.predict(features) def reset(self): """ Resets the trained weights / parameters to initial state :return: """ pass def labels_to_categorical(self, labels): _, IDs = unique(labels, return_inverse=True) return IDs
max_depth=4, random_state=2018) rf = RandomForestClassifier(1000, criterion='gini', n_jobs=-1, random_state=2018) lor = LogisticRegression(solver='newton-cg', multi_class='multinomial', max_iter=1000) clf = VotingClassifier([('gbt', gbt), ('rf', rf), ('lor', lor)], voting='soft', weights=[3, 2, 1], n_jobs=-1) if args.s: clf.fit(X_train, y_train) joblib.dump(clf, 'checkpoint/voting.pkl') X_test = pd.read_csv("data/test_python.csv", encoding='utf-8') pred = clf.predict_proba(X_test) np.savetxt('submission/submission.csv', np.c_[X_test['listing_id'], pred[:, [2, 1, 0]]], delimiter=',', header='listing_id,high,medium,low', fmt='%d,%.16f,%.16f,%.16f', comments='') else: cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=2018) scores = cross_val_score(clf, X_train, y_train, scoring='neg_log_loss',
pipe1 = make_pipeline(ColumnSelector(cols=split1), LinearSVC()) pipe2 = make_pipeline(ColumnSelector(cols=split2), LinearSVC()) pipe3 = make_pipeline(ColumnSelector(cols=split3), LinearSVC()) pipe4 = make_pipeline(ColumnSelector(cols=split4), LinearSVC()) pipe5 = make_pipeline(ColumnSelector(cols=split5), LinearSVC()) # create the ensemble with the votingclassifier cls = VotingClassifier([ ('l1', pipe1), ('l2', pipe2), ('l3', pipe3), ('l4', pipe4), ('l5', pipe5), ], n_jobs=4) cls.fit(cars_train_X, cars_train_y) # uncomment the 3 lines below if needed to see the accuracy and std-dev of the training set # scores = cross_val_score(cls, cars_train_X, cars_train_y, cv=5, verbose=True) # print(scores) # print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # this reaches about 30% acc # create the predictions and dump to a file for plotting the heatmap y_pred = cls.predict(cars_test_X) with open('5subset_linearsvm_voting.sav', 'wb') as f: pkl.dump((y_pred, cars_test_y), f) y_true = cars_test_y preds = {}