def GET(self): query = web.ctx.get('query') html = html_helper.HTMLHelper() twitterData = get_twitter_data.TwitterData() if query: if(query[0] == '?'): query = query[1:] arr = query.split('&') logging.warning(arr) #default values time = 'daily' for item in arr: if 'keyword' in item: keyword = item.split('=')[1] elif 'method' in item: method = item.split('=')[1] elif 'time' in item: time = item.split('=')[1] #end loop if(method != 'baseline' and method != 'naivebayes' and method != 'maxentropy' and method != 'svm'): return html.getDefaultHTML(error=2) tweets = twitterData.getTwitterData(keyword, time) if(tweets): if(method == 'baseline'): bc = baseline_classifier.BaselineClassifier(tweets, keyword, time) bc.classify() return bc.getHTML() elif(method == 'naivebayes'): trainingDataFile = 'data/training_neatfile_2.csv' #classifierDumpFile = 'data/naivebayes_trained_model.pickle' classifierDumpFile = 'data/test/naivebayes_test_model.pickle' trainingRequired = 0 nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \ trainingDataFile, classifierDumpFile, trainingRequired) nb.classify() return nb.getHTML() elif(method == 'maxentropy'): trainingDataFile = 'data/training_neatfile.csv' classifierDumpFile = 'data/maxent_trained_model.pickle' trainingRequired = 0 maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time, \ trainingDataFile, classifierDumpFile, trainingRequired) maxent.classify() return maxent.getHTML() elif(method == 'svm'): trainingDataFile = 'data/training_neatfile.csv' classifierDumpFile = 'data/svm_trained_model.pickle' trainingRequired = 0 sc = libsvm_classifier.SVMClassifier(tweets, keyword, time, \ trainingDataFile, classifierDumpFile, trainingRequired) sc.classify() return sc.getHTML() else: return html.getDefaultHTML(error=1) else: return html.getDefaultHTML()
def search(): keyword = request.args.get('keyword') method = request.args.get('method') time = 'daily' time = request.args.get('time') html = html_helper.HTMLHelper() #print html twitterData = get_twitter_data.TwitterData() #print twitterData if keyword: if(method != 'baseline' and method != 'naivebayes' and method != 'maxentropy'): return html.getDefaultHTML(error=2) ACCESS_TOKEN = session['oauth_token'] ACCESS_TOKEN_SECRET = session['oauth_secret'] tweets = twitterData.getTwitterData(keyword, time,CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) print tweets,"-------------------------" if(tweets): if(method == 'baseline'): bc = baseline_classifier.BaselineClassifier(tweets, keyword, time) bc.classify() return bc.getHTML() elif(method == 'naivebayes'): trainingDataFile = 'data/training_neatfile_2.csv' classifierDumpFile = 'data/naivebayes_trained_model.pickle' #classifierDumpFile = 'data/test/naivebayes_test_model.pickle' trainingRequired = 0 nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \ trainingDataFile, classifierDumpFile, trainingRequired) nb.classify() return nb.getHTML() elif(method == 'maxentropy'): trainingDataFile = 'data/training_neatfile.csv' classifierDumpFile = 'data/maxent_trained_model.pickle' trainingRequired = 0 maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time, \ trainingDataFile, classifierDumpFile, trainingRequired) maxent.classify() return maxent.getHTML() else: return html.getDefaultHTML(error=1) else: return html.getDefaultHTML()
def recommendations(): classified = [ "what's up?", "Your son is in hospital, recall us please $100 call award" ] f = open('spam.txt', encoding='utf-8', newline='') text = f.read() X_train = [] y_train = [] X_test = [] lines = text.split('\n') for message in lines: buff = message.split('\t', 1) X_train.append(buff[1]) y_train.append(message.split('\t', 1)[0]) for message in classified: X_test.append(message) model = naive_bayes_classifier.NaiveBayesClassifier(0.05) model.fit(X_train, y_train) y = model.predict(X_test) print(y)
def GET(self): query = web.ctx.get('query') html = html_helper.HTMLHelper() twitterData = get_twitter_data.TwitterData() if query: if (query[0] == '?'): query = query[1:] arr = query.split('&') logging.warning(arr) #default values time = 'daily' for item in arr: if 'keyword' in item: keyword = item.split('=')[1] elif 'time' in item: time = item.split('=')[1] #end loop tweets = twitterData.getTwitterData(keyword, time) if (tweets): trainingDataFile = 'fulltrainingdataset-csv.csv' #classifierDumpFile = 'data/naivebayes_trained_model.pickle' classifierDumpFile = 'my_final_classifier2.pickle' trainingRequired = 0 nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \ trainingDataFile, classifierDumpFile, trainingRequired) nb.classify() #nb.main()#added for testing #nb.accuracy() return nb.getHTML() else: return html.getDefaultHTML(error=1) else: return html.getDefaultHTML()
print "Please choose the algorithm to test, syntax = python analyze.py (svm|naivebayes|maxent)" exit() algorithm = sys.argv[1] if (algorithm == 'baseline'): bc = baseline_classifier.BaselineClassifier(tweets, keyword, time) bc.classify() val = bc.getHTML() elif (algorithm == 'naivebayes'): #trainingDataFile = 'data/training_trimmed.csv' trainingDataFile = 'data/full_training_dataset.csv' classifierDumpFile = 'data/test/naivebayes_test_model.pickle' trainingRequired = 1 print 'Started to instantiate Naive Bayes Classifier' sys.stdout.flush() nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time,\ trainingDataFile, classifierDumpFile, trainingRequired) #nb.classify() print 'Computing Accuracy' sys.stdout.flush() nb.accuracy() elif (algorithm == 'maxent'): #trainingDataFile = 'data/training_trimmed.csv' trainingDataFile = 'data/full_training_dataset.csv' classifierDumpFile = 'data/test/maxent_test_model.pickle' trainingRequired = 1 print 'Started to instantiate Max Entropy Classifier' sys.stdout.flush() maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time,\ trainingDataFile, classifierDumpFile, trainingRequired) #maxent.analyzeTweets() #maxent.classify()
def app(): keyword = 'apple' method = 'svm' training_required = 1 classify_enabled = 1 while 1: print("\n\n\n-------------------------- 0 --------------------------") print("Please select a algorithm:") print("0 - Exit from application") print("1 - Naive Bayes Algorithm") print("2 - Maximum Entropy Algorithm") print("3 - Support Vector Machine (SVM)") input_data = raw_input("Enter Value : ") if input_data.isdigit() and 0 < int(input_data) <= 3: method = int(input_data) elif input_data.isdigit() and int(input_data) == 0: break else: print("Wrong Selection! Read : " + input_data) continue print("\nCalculate Accuracy or Classify?") print("0 - Exit from application") print("1 - Accuracy") print("2 - Classify") input_data = raw_input("Enter Value : ") if input_data.isdigit() and 0 < int(input_data) <= 2: classify_enabled = int(input_data) - 1 elif input_data.isdigit() and int(input_data) == 0: break else: print("Wrong Selection! Read : " + input_data) continue if classify_enabled == 1: print("\nIs training required?") print("0 - Exit from application") print("1 - Not required") print("2 - Required") input_data = raw_input("Enter Value : ") if input_data.isdigit() and 0 < int(input_data) <= 2: training_required = int(input_data) - 1 elif input_data.isdigit() and int(input_data) == 0: break else: print("Wrong Selection! Read : " + input_data) continue print("\nEnter a keyword for test") input_data = raw_input("Enter Keyword : ") keyword = input_data print("Selected Keyword: " + keyword) print("-------------------------- = --------------------------\n\n\n") training_data_file = 'data-set/Airline-Sentiment.csv' if classify_enabled: tweets = twitterClient.getTwitterData(keyword) if not tweets: print("Tweet couldn't be fetched") return if method == 1: print("Method: naive bayes") if classify_enabled: classifier_dump_file = 'data-set/nb_trained_model.pickle' nb = naive_bayes_classifier.NaiveBayesClassifier( training_data_file, classifier_dump_file) nb.classify(tweets, training_required) else: classifier_dump_file = 'data-set/nb_trained_model_acc.pickle' nb = naive_bayes_classifier.NaiveBayesClassifier( training_data_file, classifier_dump_file) nb.accuracy() elif method == 2: print("Method: Max Entropy") if classify_enabled: classifier_dump_file = 'data-set/maxent_trained_model.pickle' maxent = max_entropy_classifier.MaxEntClassifier( training_data_file, classifier_dump_file) maxent.classify(tweets, training_required) else: classifier_dump_file = 'data-set/maxent_trained_model_acc.pickle' maxent = max_entropy_classifier.MaxEntClassifier( training_data_file, classifier_dump_file) maxent.accuracy() elif method == 3: print("Method: Support Vector Machine") if classify_enabled: classifier_dump_file = 'data-set/svm_trained_model.pickle' sc = libsvm_classifier.SVMClassifier(training_data_file, classifier_dump_file) sc.classify(tweets, training_required) else: classifier_dump_file = 'data-set/svm_trained_model_acc.pickle' sc = libsvm_classifier.SVMClassifier(training_data_file, classifier_dump_file) sc.accuracy()