from myWordcloud import my_wordcloud from classifiers.__init__ import run_all_classifiers from supportFuncs import stopWords # Run everything: if __name__ == '__main__': stop_words = stopWords.get_stop_words() usePipeline = False # Pipeline currently not running crossValidation. Use the regular way. my_wordcloud(stop_words) run_all_classifiers(stop_words, usePipeline)
clf.fit(vectorTrain, train_data['Category']) y_pred = clf.predict(vectorTest) # print "Train Accuracy :: ", accuracy_score(train_data['Category'], clf.predict(vectorTrain)) # print "Test Accuracy :: ", accuracy_score(train_data['Category'], y_pred) #y_pred = cross_val_predict(clf, X=vectorTrain, y=vectorTest, cv=10, n_jobs=multiprocessing.cpu_count()) writePredictionsToCsv.write_predictions_to_csv(y_pred, test_data, dynamic_datasets_path) # Best GridSearch params # print clf.best_params_ print("Elapsed time of successional-run: ", time.time() - start_time_successional) print('MyMethodClassifier finished!\n') return scores # Run myMethodClassifier directly: if __name__ == '__main__': dynamic_datasets_path = '..' data = readDatasets.read_dataset(dynamic_datasets_path) trainData = data[0] testData = data[1] my_method_classifier(stopWords.get_stop_words(), trainData, testData, dynamic_datasets_path) exit()
# GridSearch (find the best parameters) # parameters = {'n_neighbors': [5, 10], 'weights': ['uniform', 'distance'], 'algorithm' : ['ball_tree', 'kd_tree', 'brute'], 'p': [1, 2]} # svr = KNeighborsClassifier() # clf = GridSearchCV(svr, parameters) clf.fit(vectorTrain, train_y) y_pred = clf.predict(vectorTest) # print("Train Accuracy :: ", accuracy_score(train_y, clf.predict(vectorTrain))) print("Test Accuracy :: ", accuracy_score(test_y, y_pred)) # Best GridSearch params # print clf.best_params_ print("Elapsed time of successional-run: ", time.time() - start_time_successional) print('knnClassifier finished!\n') return scores # Run svmClassifier directly: if __name__ == '__main__': dynamic_datasets_path = '..' data = readDatasets.read_dataset(dynamic_datasets_path) trainData = data[0] testData = data[1] usePipeline = False knn(stopWords.get_stop_words(), trainData, testData, usePipeline) exit()
# GridSearch # parameters = {'n_estimators': [130, 110, 100, 80, 50, 30, 20, 10]} # svr = RandomForestClassifier() # clf = GridSearchCV(svr, parameters) # clf.fit(vectorTrain, train_y) # y_pred = clf.predict(vectorTest) # # print "Train Accuracy :: ", accuracy_score(train_y, clf.predict(vectorTrain)) # print "Test Accuracy :: ", accuracy_score(test_y, y_pred) # Best GridSearch params # print clf.best_params_ print("Elapsed time of successional-run: ", time.time() - start_time_successional) print('rfClassifier finished!\n') return scores # Run rfClassifier directly: if __name__ == '__main__': data = readDatasets.read_dataset() trainData = data[0] testData = data[1] usePipeline = False rf_classifier(stopWords.get_stop_words(), trainData, testData, usePipeline)
# print(neighbors) result = getResponse(neighbors) # print result predictions.append(result) # print('> predicted=' + repr(result) + ', actual=' + repr(train_data['Category'][8586+x])) if result == train_data['Category'][8586 + x]: count += 1 # print 'Test', test_x[1:2], 'Pred', predictions[0] # accuracies.append(getAccuracy(test_data, predictions)) # print('Accuracy: ' + repr(accuracy) + '%') print("Got right", count, "out of", 100) # Final accuracy after crossValidation # print accuracies # print np.mean(accuracies) print("Elapsed time of successional-run: ", time.time() - start_time_successional) # Run knnClassifier directly: if __name__ == '__main__': dynamic_datasets_path = '..' data = readDatasets.read_dataset(dynamic_datasets_path) trainData = data[0] testData = data[1] knn_classifier(stopWords.get_stop_words(), trainData, testData) exit()
y = np.array(accuraccies) x_new = np.linspace(x.min(), x.max(), 500) f = interp1d(x, y, kind='quadratic') y_smooth = f(x_new) plt.xlabel('n_components') plt.ylabel('Accuracy') plt.title('RandomForestClassifier Accuracy Graph') plt.plot(x_new, y_smooth, color='r') plt.scatter(x, y) plt.savefig( os.path.join(dynamic_datasets_path, 'Resources', 'images', 'RandomForestClassifier Accuracy Graph.png')) plt.show() print('rfClassifier finished!\n') return scores # Run rfClassifier directly: if __name__ == '__main__': dynamic_datasets_path = '..' data = readDatasets.read_dataset(dynamic_datasets_path) trainData = data[0] testData = data[1] usePipeline = False rf_classifier_with_graph(stopWords.get_stop_words(), trainData, testData, usePipeline) exit()
with open('Resources/csv/train_set.csv', mode='r', encoding="utf8") as csvfile: csvReader = csv.DictReader(csvfile, delimiter='\t', quotechar='|') for row in csvReader: category = row["Category"] if category == 'Business': businessStr += row["Content"] elif category == 'Politics': politicsStr += row["Content"] elif category == 'Football': footballStr += row["Content"] elif category == 'Film': filmStr += row["Content"] elif category == 'Technology': technologyStr += row["Content"] show_wordcloud(stop_words, businessStr, 'Business') show_wordcloud(stop_words, politicsStr, 'Politics') show_wordcloud(stop_words, footballStr, 'Football') show_wordcloud(stop_words, filmStr, 'Film') show_wordcloud(stop_words, technologyStr, 'Technology') print('myWordcloud finished!\n') # Run myWordcloud directly: if __name__ == '__main__': my_wordcloud(stopWords.get_stop_words())
# Write the scores. csvWriter.writerow(['Accuracy'] + ['{:.3}'.format(nbScores[0])] + ['{:.3}'.format(rfScores[0])] + ['{:.3}'.format(svmScores[0])] + ['knn'] + ['{:.3}'.format(mymethodScores[0])]) # + [knnScores[0]] + [mymethodScores[0]]) csvWriter.writerow(['Precision'] + ['{:.3}'.format(nbScores[1])] + ['{:.3}'.format(rfScores[1])] + ['{:.3}'.format(svmScores[1])] + ['knn'] + ['{:.3}'.format(mymethodScores[1])]) # + [knnScores[1]]] + [mymethodScores[1]]) csvWriter.writerow(['Recall'] + ['{:.3}'.format(nbScores[2])] + ['{:.3}'.format(rfScores[2])] + ['{:.3}'.format(svmScores[2])] + ['knn'] + ['{:.3}'.format(mymethodScores[2])]) # + [knnScores[2]] + [mymethodScores[2]]) csvWriter.writerow(['F-Measure'] + ['{:.3}'.format(nbScores[3])] + ['{:.3}'.format(rfScores[3])] + ['{:.3}'.format(svmScores[3])] + ['knn'] + ['{:.3}'.format(mymethodScores[3])]) # + [knnScores[3]]] + [mymethodScores[3]]) print('Finished writing to the outputCsvFile!') # Run all classifiers: if __name__ == '__main__': usePipeline = False run_all_classifiers(stopWords.get_stop_words(), usePipeline)
csvReader = csv.DictReader(csvfile, delimiter='\t', quotechar='|') for row in csvReader: category = row["Category"] if category == 'Business': businessStr += row["Content"] elif category == 'Politics': politicsStr += row["Content"] elif category == 'Football': footballStr += row["Content"] elif category == 'Film': filmStr += row["Content"] elif category == 'Technology': technologyStr += row["Content"] show_wordcloud(stop_words, businessStr, 'Business', dynamic_datasets_path) show_wordcloud(stop_words, politicsStr, 'Politics', dynamic_datasets_path) show_wordcloud(stop_words, footballStr, 'Football', dynamic_datasets_path) show_wordcloud(stop_words, filmStr, 'Film', dynamic_datasets_path) show_wordcloud(stop_words, technologyStr, 'Technology', dynamic_datasets_path) print('myWordcloud finished!\n') # Run myWordcloud directly: if __name__ == '__main__': dynamic_datasets_path = '' my_wordcloud(stopWords.get_stop_words(), dynamic_datasets_path) exit()