def GET(self):
        query = web.ctx.get('query')
        html = html_helper.HTMLHelper()
        twitterData = get_twitter_data.TwitterData()
        if query:
            if(query[0] == '?'):
                query = query[1:]
            arr = query.split('&')
            logging.warning(arr)
            
            #default values
            time = 'daily'

            for item in arr:
                if 'keyword' in item:
                    keyword = item.split('=')[1]
                elif 'method' in item:
                    method = item.split('=')[1]
                elif 'time' in item:
                    time = item.split('=')[1]
            #end loop
                            
            if(method != 'baseline' and method != 'naivebayes' and method != 'maxentropy' and method != 'svm'):
                return html.getDefaultHTML(error=2)
            
            tweets = twitterData.getTwitterData(keyword, time)
            if(tweets):
                if(method == 'baseline'):
                    bc = baseline_classifier.BaselineClassifier(tweets, keyword, time)
                    bc.classify()
                    return bc.getHTML()
                elif(method == 'naivebayes'):
                    trainingDataFile = 'data/training_neatfile_2.csv'               
                    #classifierDumpFile = 'data/naivebayes_trained_model.pickle'
                    classifierDumpFile = 'data/test/naivebayes_test_model.pickle'
                    trainingRequired = 0
                    nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \
                                                  trainingDataFile, classifierDumpFile, trainingRequired)
                    nb.classify()
                    return nb.getHTML()
                elif(method == 'maxentropy'):
                    trainingDataFile = 'data/training_neatfile.csv'                
                    classifierDumpFile = 'data/maxent_trained_model.pickle'
                    trainingRequired = 0
                    maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time, \
                                                  trainingDataFile, classifierDumpFile, trainingRequired)
                    maxent.classify()
                    return maxent.getHTML()
                elif(method == 'svm'):
                    trainingDataFile = 'data/training_neatfile.csv'                
                    classifierDumpFile = 'data/svm_trained_model.pickle'
                    trainingRequired = 0
                    sc = libsvm_classifier.SVMClassifier(tweets, keyword, time, \
                                                  trainingDataFile, classifierDumpFile, trainingRequired)
                    sc.classify()
                    return sc.getHTML()
            else:
                return html.getDefaultHTML(error=1)
        else:
            return html.getDefaultHTML()
Esempio n. 2
0
def search():
	
	keyword = request.args.get('keyword')
	method = request.args.get('method')
	time = 'daily'
	time = request.args.get('time')

	html = html_helper.HTMLHelper()
	#print html
	twitterData = get_twitter_data.TwitterData()
	#print twitterData
	if keyword:	                    
		if(method != 'baseline' and method != 'naivebayes' and method != 'maxentropy'):
		    return html.getDefaultHTML(error=2)
		ACCESS_TOKEN =  session['oauth_token']
		ACCESS_TOKEN_SECRET = session['oauth_secret']
		tweets = twitterData.getTwitterData(keyword, time,CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
		print tweets,"-------------------------"
		if(tweets):
		    if(method == 'baseline'):
		        bc = baseline_classifier.BaselineClassifier(tweets, keyword, time)
		        bc.classify()
		        return bc.getHTML()
		    elif(method == 'naivebayes'):
		        trainingDataFile = 'data/training_neatfile_2.csv'               
		        classifierDumpFile = 'data/naivebayes_trained_model.pickle'
		        #classifierDumpFile = 'data/test/naivebayes_test_model.pickle'
		        trainingRequired = 0
		        nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \
		                                      trainingDataFile, classifierDumpFile, trainingRequired)
		        nb.classify()
		        return nb.getHTML()
		    elif(method == 'maxentropy'):
		        trainingDataFile = 'data/training_neatfile.csv'                
		        classifierDumpFile = 'data/maxent_trained_model.pickle'
		        trainingRequired = 0
		        maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time, \
		                                      trainingDataFile, classifierDumpFile, trainingRequired)
		        maxent.classify()
		        return maxent.getHTML()
		else:
		    return html.getDefaultHTML(error=1)
	else:
		return html.getDefaultHTML()
infile = open('data/tweets_current.pickle')
tweets = pickle.load(infile)
infile.close()

#algorithm = 'baseline'
#algorithm = 'naivebayes'
#algorithm = 'maxent'
#algorithm = 'svm'

if (len(sys.argv) < 2):
    print "Please choose the algorithm to test, syntax = python analyze.py (svm|naivebayes|maxent)"
    exit()

algorithm = sys.argv[1]
if (algorithm == 'baseline'):
    bc = baseline_classifier.BaselineClassifier(tweets, keyword, time)
    bc.classify()
    val = bc.getHTML()
elif (algorithm == 'naivebayes'):
    #trainingDataFile = 'data/training_trimmed.csv'
    trainingDataFile = 'data/full_training_dataset.csv'
    classifierDumpFile = 'data/test/naivebayes_test_model.pickle'
    trainingRequired = 1
    print 'Started to instantiate Naive Bayes Classifier'
    sys.stdout.flush()
    nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time,\
                                  trainingDataFile, classifierDumpFile, trainingRequired)
    #nb.classify()
    print 'Computing Accuracy'
    sys.stdout.flush()
    nb.accuracy()
Esempio n. 4
0
        print(
            "\n\tBaseline classifier has been already applied. Please Move to next steps :)\n"
        )
    else:
        print("\tSystem exiting now. Visit again !!!\n")
        sys.exit()
elif (inp == '1'):
    if (os.path.isfile("sentiment_tweet_list.txt")):
        print(
            "\tBaseline classifier has been already applied. Please Move to next steps :)\n"
        )
    else:
        print("\tworking on it\n")
        df = pd.read_excel("Stemmed_DATA.xlsx", sheet_name="Sheet1")
        list_tweets = df['Stemming tweet']
        bc = baseline_classifier.BaselineClassifier(list_tweets)
        bc.classify()
        bc.writeOutput('sentiment_tweet_list.txt', 'w')
else:
    print("\tWrong choice. System exiting now.\n")
    sys.exit()

print("Split the data into Train and Test dataset.\n")
inp = input("Press 0/1 to continue ... ")
if (inp == '0'):
    if (os.path.isfile("Data_to_split.xlsx")
            and os.path.isfile("train_data.xlsx")
            and os.path.isfile("test_data.xlsx")):
        print("\tAll required files already exist. :)\n")
    else:
        print("\tSystem exiting now. Visit again and then press 1 !!!\n")