def main_function(): conn = MySQLdb.connect( host=DATABASES["date_cutoff"]["HOST"], user=DATABASES["date_cutoff"]["USER"], passwd=DATABASES["date_cutoff"]["PASSWORD"], db=DATABASES["date_cutoff"]["NAME"], ) training_tweets = get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) classifier = NaiveBayesClassifier.train(training_feature_set) error_dict = {"+": 0, "-": 0, "I": 0, "O": 0} count_dict = {"+": 0, "-": 0, "I": 0, "O": 0} guess_dict = {"+": 0, "-": 0, "I": 0, "O": 0} full_matrix = { "+": {"+": 0, "-": 0, "I": 0, "O": 0}, "-": {"+": 0, "-": 0, "I": 0, "O": 0}, "I": {"+": 0, "-": 0, "I": 0, "O": 0}, "O": {"+": 0, "-": 0, "I": 0, "O": 0}, } count_table = {"+": 0, "-": 0, "I": 0, "O": 0} test_tweets = get_test_tweets(conn) test_feature_set = classify.process_tweets(test_tweets) classifier_accuracy = accuracy(classifier, test_feature_set) print count_table print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) config_megam('/opt/packages') #classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) classifier = NaiveBayesClassifier.train(training_feature_set) #classifier.show_most_informative_features(50, show='pos') #classifier.show_most_informative_features(50, show='neg') #classifier.explain(training_feature_set[0][0]) #print training_feature_set[0] error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} full_matrix = { '+': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, '-': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'I': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'O': { '+': 0, '-': 0, 'I': 0, 'O': 0 } } count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} test_tweets = classify.get_test_tweets(conn) test_feature_set = classify.process_tweets(test_tweets) classifier_accuracy = accuracy(classifier, test_feature_set) #print count_table print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'], user=DATABASES['date_cutoff']['USER'], passwd=DATABASES['date_cutoff']['PASSWORD'], db=DATABASES['date_cutoff']['NAME']) training_tweets = get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) classifier = NaiveBayesClassifier.train(training_feature_set) error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} full_matrix = { '+': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, '-': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'I': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'O': { '+': 0, '-': 0, 'I': 0, 'O': 0 } } count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} test_tweets = get_test_tweets(conn) test_feature_set = classify.process_tweets(test_tweets) classifier_accuracy = accuracy(classifier, test_feature_set) print count_table print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn_analysis) training_feature_set = classify.process_tweets(training_tweets) config_megam('/opt/packages') classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) count_table = {'+':0, '-':0, 'I':0, 'O':0} tweets = classify.get_tweets_to_classify(conn_analysis); for tweet in tweets: text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0] guess = classifier.classify(classify.process_tweet(text)) update_tweet_polarity(tweet[0], guess, conn_analysis) count_table[guess] += 1 #For the tweets where polarity was determined manually, copy from #majority_vote to auto_vote fix_manual_tweets(conn_analysis) print count_table
def main_function(): conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], user=DATABASES['ensemble']['USER'], passwd=DATABASES['ensemble']['PASSWORD'], db=DATABASES['ensemble']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) classifier = NaiveBayesClassifier.train(training_feature_set) error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} tweets = classify.get_tweets_to_classify(conn) for tweet in tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 #fix_manual_tweets(conn_analysis) classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED) print count_table print full_matrix
def main_function(): conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], user=DATABASES['ensemble']['USER'], passwd=DATABASES['ensemble']['PASSWORD'], db=DATABASES['ensemble']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) classifier = NaiveBayesClassifier.train(training_feature_set) error_dict = {'+':0, '-':0, 'I':0, 'O':0} count_dict = {'+':0, '-':0, 'I':0, 'O':0} guess_dict = {'+':0, '-':0, 'I':0, 'O':0} count_table = {'+':0, '-':0, 'I':0, 'O':0} tweets = classify.get_tweets_to_classify(conn); for tweet in tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 #fix_manual_tweets(conn_analysis) classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED) print count_table print full_matrix
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn_analysis) training_feature_set = classify.process_tweets(training_tweets) config_megam('/opt/packages') classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} tweets = classify.get_tweets_to_classify(conn_analysis) for tweet in tweets: text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0] guess = classifier.classify(classify.process_tweet(text)) update_tweet_polarity(tweet[0], guess, conn_analysis) count_table[guess] += 1 #For the tweets where polarity was determined manually, copy from #majority_vote to auto_vote fix_manual_tweets(conn_analysis) print count_table
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+':0, '-':0, 'I':0, 'O':0} test_tweets = classify.get_test_tweets(conn) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+':0, '-':0, 'I':0, 'O':0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_tweet_polarity_ensemble(tweet[0], guess, conn) count_table[guess] += 1 print "Maximum Entropy" print count_table #generate the accuracy matrix full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, '-':{'+':0, '-':0, 'I':0, 'O':0}, 'I':{'+':0, '-':0, 'I':0, 'O':0}, 'O':{'+':0, '-':0, 'I':0, 'O':0}} for tweet in test_tweets: result = classify.run_sql(conn, classify.Statements.CHECK_CONSENSUS % tweet[0]) guess = result[0][0] actual_result = classify.run_sql(conn, classify.Statements.CHECK_MAJORITY % tweet[0]) actual = actual_result[0][0] if guess is not None: if actual is not None: full_matrix[actual][guess] += 1 print full_matrix
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) config_megam('/opt/packages') #classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) classifier = NaiveBayesClassifier.train(training_feature_set) #classifier.show_most_informative_features(50, show='pos') #classifier.show_most_informative_features(50, show='neg') #classifier.explain(training_feature_set[0][0]) #print training_feature_set[0] error_dict = {'+':0, '-':0, 'I':0, 'O':0} count_dict = {'+':0, '-':0, 'I':0, 'O':0} guess_dict = {'+':0, '-':0, 'I':0, 'O':0} full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, '-':{'+':0, '-':0, 'I':0, 'O':0}, 'I':{'+':0, '-':0, 'I':0, 'O':0}, 'O':{'+':0, '-':0, 'I':0, 'O':0}} count_table = {'+':0, '-':0, 'I':0, 'O':0} test_tweets = classify.get_test_tweets(conn) test_feature_set = classify.process_tweets(test_tweets) classifier_accuracy = accuracy(classifier, test_feature_set) #print count_table print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn_analysis = MySQLdb.connect(host="localhost", user="******", passwd="tanzania", db="twitter_heart") training_tweets = classify.get_training_tweets(conn_analysis) training_feature_set = classify.process_tweets(training_tweets) tweets = classify.get_tweets_to_classify(conn_analysis) bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} for tweet in tweets: text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn_analysis) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in tweets: text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_max_ent_polarity(tweet[0], guess, conn_analysis) count_table[guess] += 1 #For the tweets where polarity was determined manually, copy from #majority_vote to auto_vote fix_manual_tweets(conn_analysis) print "Maximum Entropy" print count_table
def main_function(): conn_analysis = MySQLdb.connect(host="localhost", user="******", passwd="tanzania", db="twitter_heart") training_tweets = classify.get_training_tweets(conn_analysis) training_feature_set = classify.process_tweets(training_tweets) tweets = classify.get_tweets_to_classify(conn_analysis); bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+':0, '-':0, 'I':0, 'O':0} for tweet in tweets: text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn_analysis) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+':0, '-':0, 'I':0, 'O':0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in tweets: text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_max_ent_polarity(tweet[0], guess, conn_analysis) count_table[guess] += 1 #For the tweets where polarity was determined manually, copy from #majority_vote to auto_vote fix_manual_tweets(conn_analysis) print "Maximum Entropy" print count_table
def main_function(): conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], user=DATABASES['ensemble']['USER'], passwd=DATABASES['ensemble']['PASSWORD'], db=DATABASES['ensemble']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} test_tweets = classify.get_test_tweets(conn) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_tweet_polarity_ensemble(tweet[0], guess, conn) count_table[guess] += 1 print "Maximum Entropy" print count_table #generate the accuracy matrix full_matrix = { '+': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, '-': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'I': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'O': { '+': 0, '-': 0, 'I': 0, 'O': 0 } } for tweet in test_tweets: result = classify.run_sql( conn, classify.Statements.CHECK_CONSENSUS % tweet[0]) guess = result[0][0] actual_result = classify.run_sql( conn, classify.Statements.CHECK_MAJORITY % tweet[0]) actual = actual_result[0][0] if guess is not None: if actual is not None: full_matrix[actual][guess] += 1 print full_matrix