def main_function(): conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], user=DATABASES['ensemble']['USER'], passwd=DATABASES['ensemble']['PASSWORD'], db=DATABASES['ensemble']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) classifier = NaiveBayesClassifier.train(training_feature_set) error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} tweets = classify.get_tweets_to_classify(conn) for tweet in tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 #fix_manual_tweets(conn_analysis) classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED) print count_table print full_matrix
def main_function(): conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], user=DATABASES['ensemble']['USER'], passwd=DATABASES['ensemble']['PASSWORD'], db=DATABASES['ensemble']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) classifier = NaiveBayesClassifier.train(training_feature_set) error_dict = {'+':0, '-':0, 'I':0, 'O':0} count_dict = {'+':0, '-':0, 'I':0, 'O':0} guess_dict = {'+':0, '-':0, 'I':0, 'O':0} count_table = {'+':0, '-':0, 'I':0, 'O':0} tweets = classify.get_tweets_to_classify(conn); for tweet in tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 #fix_manual_tweets(conn_analysis) classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED) print count_table print full_matrix
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+':0, '-':0, 'I':0, 'O':0} test_tweets = classify.get_test_tweets(conn) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+':0, '-':0, 'I':0, 'O':0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_tweet_polarity_ensemble(tweet[0], guess, conn) count_table[guess] += 1 print "Maximum Entropy" print count_table #generate the accuracy matrix full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, '-':{'+':0, '-':0, 'I':0, 'O':0}, 'I':{'+':0, '-':0, 'I':0, 'O':0}, 'O':{'+':0, '-':0, 'I':0, 'O':0}} for tweet in test_tweets: result = classify.run_sql(conn, classify.Statements.CHECK_CONSENSUS % tweet[0]) guess = result[0][0] actual_result = classify.run_sql(conn, classify.Statements.CHECK_MAJORITY % tweet[0]) actual = actual_result[0][0] if guess is not None: if actual is not None: full_matrix[actual][guess] += 1 print full_matrix
def fix_manual_tweets(conn): classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED)
def main_function(): conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], user=DATABASES['ensemble']['USER'], passwd=DATABASES['ensemble']['PASSWORD'], db=DATABASES['ensemble']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} test_tweets = classify.get_test_tweets(conn) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_tweet_polarity_ensemble(tweet[0], guess, conn) count_table[guess] += 1 print "Maximum Entropy" print count_table #generate the accuracy matrix full_matrix = { '+': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, '-': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'I': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'O': { '+': 0, '-': 0, 'I': 0, 'O': 0 } } for tweet in test_tweets: result = classify.run_sql( conn, classify.Statements.CHECK_CONSENSUS % tweet[0]) guess = result[0][0] actual_result = classify.run_sql( conn, classify.Statements.CHECK_MAJORITY % tweet[0]) actual = actual_result[0][0] if guess is not None: if actual is not None: full_matrix[actual][guess] += 1 print full_matrix
def get_test_tweets(conn): return classify.run_sql(conn, classify.Statements.TEST_TWEETS)
def get_training_tweets(conn): return classify.run_sql(conn, classify.Statements.TRAINING_TWEETS)