def gradient_boosting_classify(my_train_data, my_train_label, my_test_data, estimators): clf = GradientBoostingClassifier(n_estimators=estimators) scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5) print("gradient boosting(%d) accuracy: %0.3f (+/- %0.3f)" % (estimators, scores.mean(), scores.std() * 2)) clf.fit(my_train_data, my_train_label) my_test_label = clf.predict(my_test_data) file_name = "gradient_boosting_%d.csv" % estimators data_storer.save_data(my_test_label, file_name)
def multinomial_nb_classify(my_train_data, my_train_label, my_test_data): clf = MultinomialNB(alpha=0.1) scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5) print("multinomial native bayes accuracy: %0.3f (+/- %0.3f)" % (scores.mean(), scores.std() * 2)) clf.fit(my_train_data, my_train_label) my_test_label = clf.predict(my_test_data) file_name = "multinomial_nb.csv" data_storer.save_data(my_test_label, file_name)
def random_forest_classify(my_train_data, my_train_label, my_test_data, estimators): clf = RandomForestClassifier(n_estimators=estimators) scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5) print("random forest(%d) accuracy: %0.3f (+/- %0.3f)" % (estimators, scores.mean(), scores.std() * 2)) clf.fit(my_train_data, my_train_label) my_test_label = clf.predict(my_test_data) file_name = "random_forest_%d.csv" % estimators data_storer.save_data(my_test_label, file_name)
def knn_classify(my_train_data, my_train_label, my_test_data, neighbors): clf = KNeighborsClassifier(n_neighbors=neighbors) scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5) print("knn(%d) accuracy: %0.3f (+/- %0.3f)" % (neighbors, scores.mean(), scores.std() * 2)) clf.fit(my_train_data, my_train_label) my_test_label = clf.predict(my_test_data) file_name = "knn_%d.csv" % neighbors data_storer.save_data(my_test_label, file_name)
def gaussian_nb_classify(my_train_data, my_train_label, my_test_data): clf = GaussianNB() scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5) print("Gaussian native bayes accuracy: %0.3f (+/- %0.3f)" % (scores.mean(), scores.std() * 2)) clf.fit(my_train_data, my_train_label) my_test_label = clf.predict(my_test_data) file_name = "gaussian_nb.csv" data_storer.save_data(my_test_label, file_name)
def svc_classify(my_train_data, my_train_label, my_test_data, svc_c): # clf = svm.SVC(C=svc_c, kernel='poly') clf = svm.SVC(C=svc_c) scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5) print("svc(C=%.1f) accuracy: %0.3f (+/- %0.3f)" % (svc_c, scores.mean(), scores.std() * 2)) clf.fit(my_train_data, my_train_label) my_test_label = clf.predict(my_test_data) file_name = "svc_%.1f.csv" % svc_c data_storer.save_data(my_test_label, file_name)