def GET(self):
        query = web.ctx.get('query')
        html = html_helper.HTMLHelper()
        twitterData = get_twitter_data.TwitterData()
        if query:
            if(query[0] == '?'):
                query = query[1:]
            arr = query.split('&')
            logging.warning(arr)
            
            #default values
            time = 'daily'

            for item in arr:
                if 'keyword' in item:
                    keyword = item.split('=')[1]
                elif 'method' in item:
                    method = item.split('=')[1]
                elif 'time' in item:
                    time = item.split('=')[1]
            #end loop
                            
            if(method != 'baseline' and method != 'naivebayes' and method != 'maxentropy' and method != 'svm'):
                return html.getDefaultHTML(error=2)
            
            tweets = twitterData.getTwitterData(keyword, time)
            if(tweets):
                if(method == 'baseline'):
                    bc = baseline_classifier.BaselineClassifier(tweets, keyword, time)
                    bc.classify()
                    return bc.getHTML()
                elif(method == 'naivebayes'):
                    trainingDataFile = 'data/training_neatfile_2.csv'               
                    #classifierDumpFile = 'data/naivebayes_trained_model.pickle'
                    classifierDumpFile = 'data/test/naivebayes_test_model.pickle'
                    trainingRequired = 0
                    nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \
                                                  trainingDataFile, classifierDumpFile, trainingRequired)
                    nb.classify()
                    return nb.getHTML()
                elif(method == 'maxentropy'):
                    trainingDataFile = 'data/training_neatfile.csv'                
                    classifierDumpFile = 'data/maxent_trained_model.pickle'
                    trainingRequired = 0
                    maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time, \
                                                  trainingDataFile, classifierDumpFile, trainingRequired)
                    maxent.classify()
                    return maxent.getHTML()
                elif(method == 'svm'):
                    trainingDataFile = 'data/training_neatfile.csv'                
                    classifierDumpFile = 'data/svm_trained_model.pickle'
                    trainingRequired = 0
                    sc = libsvm_classifier.SVMClassifier(tweets, keyword, time, \
                                                  trainingDataFile, classifierDumpFile, trainingRequired)
                    sc.classify()
                    return sc.getHTML()
            else:
                return html.getDefaultHTML(error=1)
        else:
            return html.getDefaultHTML()
예제 #2
0
def search():
	
	keyword = request.args.get('keyword')
	method = request.args.get('method')
	time = 'daily'
	time = request.args.get('time')

	html = html_helper.HTMLHelper()
	#print html
	twitterData = get_twitter_data.TwitterData()
	#print twitterData
	if keyword:	                    
		if(method != 'baseline' and method != 'naivebayes' and method != 'maxentropy'):
		    return html.getDefaultHTML(error=2)
		ACCESS_TOKEN =  session['oauth_token']
		ACCESS_TOKEN_SECRET = session['oauth_secret']
		tweets = twitterData.getTwitterData(keyword, time,CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
		print tweets,"-------------------------"
		if(tweets):
		    if(method == 'baseline'):
		        bc = baseline_classifier.BaselineClassifier(tweets, keyword, time)
		        bc.classify()
		        return bc.getHTML()
		    elif(method == 'naivebayes'):
		        trainingDataFile = 'data/training_neatfile_2.csv'               
		        classifierDumpFile = 'data/naivebayes_trained_model.pickle'
		        #classifierDumpFile = 'data/test/naivebayes_test_model.pickle'
		        trainingRequired = 0
		        nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \
		                                      trainingDataFile, classifierDumpFile, trainingRequired)
		        nb.classify()
		        return nb.getHTML()
		    elif(method == 'maxentropy'):
		        trainingDataFile = 'data/training_neatfile.csv'                
		        classifierDumpFile = 'data/maxent_trained_model.pickle'
		        trainingRequired = 0
		        maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time, \
		                                      trainingDataFile, classifierDumpFile, trainingRequired)
		        maxent.classify()
		        return maxent.getHTML()
		else:
		    return html.getDefaultHTML(error=1)
	else:
		return html.getDefaultHTML()
예제 #3
0
def recommendations():
    classified = [
        "what's up?",
        "Your son is in hospital, recall us please $100 call award"
    ]
    f = open('spam.txt', encoding='utf-8', newline='')
    text = f.read()
    X_train = []
    y_train = []
    X_test = []
    lines = text.split('\n')
    for message in lines:
        buff = message.split('\t', 1)
        X_train.append(buff[1])
        y_train.append(message.split('\t', 1)[0])
    for message in classified:
        X_test.append(message)

    model = naive_bayes_classifier.NaiveBayesClassifier(0.05)
    model.fit(X_train, y_train)
    y = model.predict(X_test)

    print(y)
    def GET(self):
        query = web.ctx.get('query')
        html = html_helper.HTMLHelper()
        twitterData = get_twitter_data.TwitterData()
        if query:
            if (query[0] == '?'):
                query = query[1:]
            arr = query.split('&')
            logging.warning(arr)

            #default values
            time = 'daily'

            for item in arr:
                if 'keyword' in item:
                    keyword = item.split('=')[1]
                elif 'time' in item:
                    time = item.split('=')[1]
            #end loop

            tweets = twitterData.getTwitterData(keyword, time)
            if (tweets):
                trainingDataFile = 'fulltrainingdataset-csv.csv'
                #classifierDumpFile = 'data/naivebayes_trained_model.pickle'
                classifierDumpFile = 'my_final_classifier2.pickle'
                trainingRequired = 0
                nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time, \
                                              trainingDataFile, classifierDumpFile, trainingRequired)
                nb.classify()
                #nb.main()#added for testing
                #nb.accuracy()
                return nb.getHTML()
            else:
                return html.getDefaultHTML(error=1)
        else:
            return html.getDefaultHTML()
    print "Please choose the algorithm to test, syntax = python analyze.py (svm|naivebayes|maxent)"
    exit()

algorithm = sys.argv[1]
if (algorithm == 'baseline'):
    bc = baseline_classifier.BaselineClassifier(tweets, keyword, time)
    bc.classify()
    val = bc.getHTML()
elif (algorithm == 'naivebayes'):
    #trainingDataFile = 'data/training_trimmed.csv'
    trainingDataFile = 'data/full_training_dataset.csv'
    classifierDumpFile = 'data/test/naivebayes_test_model.pickle'
    trainingRequired = 1
    print 'Started to instantiate Naive Bayes Classifier'
    sys.stdout.flush()
    nb = naive_bayes_classifier.NaiveBayesClassifier(tweets, keyword, time,\
                                  trainingDataFile, classifierDumpFile, trainingRequired)
    #nb.classify()
    print 'Computing Accuracy'
    sys.stdout.flush()
    nb.accuracy()
elif (algorithm == 'maxent'):
    #trainingDataFile = 'data/training_trimmed.csv'
    trainingDataFile = 'data/full_training_dataset.csv'
    classifierDumpFile = 'data/test/maxent_test_model.pickle'
    trainingRequired = 1
    print 'Started to instantiate Max Entropy Classifier'
    sys.stdout.flush()
    maxent = max_entropy_classifier.MaxEntClassifier(tweets, keyword, time,\
                                  trainingDataFile, classifierDumpFile, trainingRequired)
    #maxent.analyzeTweets()
    #maxent.classify()
def app():

    keyword = 'apple'
    method = 'svm'
    training_required = 1
    classify_enabled = 1

    while 1:
        print("\n\n\n-------------------------- 0 --------------------------")
        print("Please select a algorithm:")
        print("0 - Exit from application")
        print("1 - Naive Bayes Algorithm")
        print("2 - Maximum Entropy Algorithm")
        print("3 - Support Vector Machine (SVM)")

        input_data = raw_input("Enter Value : ")
        if input_data.isdigit() and 0 < int(input_data) <= 3:
            method = int(input_data)
        elif input_data.isdigit() and int(input_data) == 0:
            break
        else:
            print("Wrong Selection! Read : " + input_data)
            continue

        print("\nCalculate Accuracy or Classify?")
        print("0 - Exit from application")
        print("1 - Accuracy")
        print("2 - Classify")
        input_data = raw_input("Enter Value : ")
        if input_data.isdigit() and 0 < int(input_data) <= 2:
            classify_enabled = int(input_data) - 1
        elif input_data.isdigit() and int(input_data) == 0:
            break
        else:
            print("Wrong Selection! Read : " + input_data)
            continue

        if classify_enabled == 1:
            print("\nIs training required?")
            print("0 - Exit from application")
            print("1 - Not required")
            print("2 - Required")
            input_data = raw_input("Enter Value : ")
            if input_data.isdigit() and 0 < int(input_data) <= 2:
                training_required = int(input_data) - 1
            elif input_data.isdigit() and int(input_data) == 0:
                break
            else:
                print("Wrong Selection! Read : " + input_data)
                continue

            print("\nEnter a keyword for test")
            input_data = raw_input("Enter Keyword : ")
            keyword = input_data

            print("Selected Keyword: " + keyword)

        print("-------------------------- = --------------------------\n\n\n")

        training_data_file = 'data-set/Airline-Sentiment.csv'

        if classify_enabled:
            tweets = twitterClient.getTwitterData(keyword)
            if not tweets:
                print("Tweet couldn't be fetched")
                return

        if method == 1:
            print("Method: naive bayes")
            if classify_enabled:

                classifier_dump_file = 'data-set/nb_trained_model.pickle'
                nb = naive_bayes_classifier.NaiveBayesClassifier(
                    training_data_file, classifier_dump_file)
                nb.classify(tweets, training_required)

            else:
                classifier_dump_file = 'data-set/nb_trained_model_acc.pickle'
                nb = naive_bayes_classifier.NaiveBayesClassifier(
                    training_data_file, classifier_dump_file)
                nb.accuracy()

        elif method == 2:
            print("Method: Max Entropy")
            if classify_enabled:

                classifier_dump_file = 'data-set/maxent_trained_model.pickle'
                maxent = max_entropy_classifier.MaxEntClassifier(
                    training_data_file, classifier_dump_file)
                maxent.classify(tweets, training_required)

            else:
                classifier_dump_file = 'data-set/maxent_trained_model_acc.pickle'
                maxent = max_entropy_classifier.MaxEntClassifier(
                    training_data_file, classifier_dump_file)
                maxent.accuracy()

        elif method == 3:
            print("Method: Support Vector Machine")
            if classify_enabled:

                classifier_dump_file = 'data-set/svm_trained_model.pickle'
                sc = libsvm_classifier.SVMClassifier(training_data_file,
                                                     classifier_dump_file)
                sc.classify(tweets, training_required)

            else:
                classifier_dump_file = 'data-set/svm_trained_model_acc.pickle'
                sc = libsvm_classifier.SVMClassifier(training_data_file,
                                                     classifier_dump_file)
                sc.accuracy()