def run_all_classifiers(stop_words, use_pipeline, dynamic_datasets_path): data = readDatasets.read_dataset(dynamic_datasets_path) train_data = data[0] test_data = data[1] print('Running the classifiers...\n') nbScores = nbClassifier.nb_classifier(stop_words, train_data, test_data, use_pipeline) rfScores = rfClassifier.rf_classifier(stop_words, train_data, test_data, use_pipeline) svmScores = svmClassifier.svm_classifier(stop_words, train_data, test_data, use_pipeline) # knnScores = knnClassifier.knn_classifier(stop_words, train_data, test_data) mymethodScores = MyMethodClassifier.my_method_classifier( stop_words, train_data, test_data, dynamic_datasets_path) '{:06.2f}'.format(3.141592653589793) # Open an outputCsvFile and write the scores which we will receive from the classifiers. print('Writing classifiers\' scores to the outputCsvFile...\n') location_10_fold = os.path.join(dynamic_datasets_path, 'Resources', 'datasets', 'EvaluationMetric_10fold.csv') with open(location_10_fold, mode='w', encoding="utf8") as csvfile: csvWriter = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) # Write the Headers (first row & column). csvWriter.writerow(['Statistic_Measure'] + ['Naive_Bayes'] + ['Random_Forest'] + ['SVM'] + ['KNN'] + ['My_Method']) # Write the scores. csvWriter.writerow(['Accuracy'] + ['{:.3}'.format(nbScores[0])] + ['{:.3}'.format(rfScores[0])] + ['{:.3}'.format(svmScores[0])] + ['knn'] + ['{:.3}'.format(mymethodScores[0])]) # + [knnScores[0]] + [mymethodScores[0]]) csvWriter.writerow(['Precision'] + ['{:.3}'.format(nbScores[1])] + ['{:.3}'.format(rfScores[1])] + ['{:.3}'.format(svmScores[1])] + ['knn'] + ['{:.3}'.format(mymethodScores[1])]) # + [knnScores[1]]] + [mymethodScores[1]]) csvWriter.writerow(['Recall'] + ['{:.3}'.format(nbScores[2])] + ['{:.3}'.format(rfScores[2])] + ['{:.3}'.format(svmScores[2])] + ['knn'] + ['{:.3}'.format(mymethodScores[2])]) # + [knnScores[2]] + [mymethodScores[2]]) csvWriter.writerow(['F-Measure'] + ['{:.3}'.format(nbScores[3])] + ['{:.3}'.format(rfScores[3])] + ['{:.3}'.format(svmScores[3])] + ['knn'] + ['{:.3}'.format(mymethodScores[3])]) # + [knnScores[3]]] + [mymethodScores[3]]) print('Finished writing to the outputCsvFile!')
clf.fit(vectorTrain, train_data['Category']) y_pred = clf.predict(vectorTest) # print "Train Accuracy :: ", accuracy_score(train_data['Category'], clf.predict(vectorTrain)) # print "Test Accuracy :: ", accuracy_score(train_data['Category'], y_pred) #y_pred = cross_val_predict(clf, X=vectorTrain, y=vectorTest, cv=10, n_jobs=multiprocessing.cpu_count()) writePredictionsToCsv.write_predictions_to_csv(y_pred, test_data, dynamic_datasets_path) # Best GridSearch params # print clf.best_params_ print("Elapsed time of successional-run: ", time.time() - start_time_successional) print('MyMethodClassifier finished!\n') return scores # Run myMethodClassifier directly: if __name__ == '__main__': dynamic_datasets_path = '..' data = readDatasets.read_dataset(dynamic_datasets_path) trainData = data[0] testData = data[1] my_method_classifier(stopWords.get_stop_words(), trainData, testData, dynamic_datasets_path) exit()
# GridSearch # parameters = {'n_estimators': [130, 110, 100, 80, 50, 30, 20, 10]} # svr = RandomForestClassifier() # clf = GridSearchCV(svr, parameters) # clf.fit(vectorTrain, train_y) # y_pred = clf.predict(vectorTest) # # print "Train Accuracy :: ", accuracy_score(train_y, clf.predict(vectorTrain)) # print "Test Accuracy :: ", accuracy_score(test_y, y_pred) # Best GridSearch params # print clf.best_params_ print("Elapsed time of successional-run: ", time.time() - start_time_successional) print('rfClassifier finished!\n') return scores # Run rfClassifier directly: if __name__ == '__main__': data = readDatasets.read_dataset() trainData = data[0] testData = data[1] usePipeline = False rf_classifier(stopWords.get_stop_words(), trainData, testData, usePipeline)