def main_function():
    conn = MySQLdb.connect(
        host=DATABASES["date_cutoff"]["HOST"],
        user=DATABASES["date_cutoff"]["USER"],
        passwd=DATABASES["date_cutoff"]["PASSWORD"],
        db=DATABASES["date_cutoff"]["NAME"],
    )

    training_tweets = get_training_tweets(conn)
    training_feature_set = classify.process_tweets(training_tweets)

    classifier = NaiveBayesClassifier.train(training_feature_set)

    error_dict = {"+": 0, "-": 0, "I": 0, "O": 0}
    count_dict = {"+": 0, "-": 0, "I": 0, "O": 0}
    guess_dict = {"+": 0, "-": 0, "I": 0, "O": 0}

    full_matrix = {
        "+": {"+": 0, "-": 0, "I": 0, "O": 0},
        "-": {"+": 0, "-": 0, "I": 0, "O": 0},
        "I": {"+": 0, "-": 0, "I": 0, "O": 0},
        "O": {"+": 0, "-": 0, "I": 0, "O": 0},
    }

    count_table = {"+": 0, "-": 0, "I": 0, "O": 0}

    test_tweets = get_test_tweets(conn)
    test_feature_set = classify.process_tweets(test_tweets)

    classifier_accuracy = accuracy(classifier, test_feature_set)

    print count_table
    print "classifier accuracy: " + repr(classifier_accuracy)
def main_function():
    conn = MySQLdb.connect(host=DATABASES['default']['HOST'],
                           user=DATABASES['default']['USER'],
                           passwd=DATABASES['default']['PASSWORD'],
                           db=DATABASES['default']['NAME'])

    training_tweets = classify.get_training_tweets(conn)
    training_feature_set = classify.process_tweets(training_tweets)

    config_megam('/opt/packages')
    #classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
    classifier = NaiveBayesClassifier.train(training_feature_set)
    #classifier.show_most_informative_features(50, show='pos')
    #classifier.show_most_informative_features(50, show='neg')

    #classifier.explain(training_feature_set[0][0])
    #print training_feature_set[0]

    error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}

    full_matrix = {
        '+': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        '-': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        'I': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        'O': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        }
    }

    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}

    test_tweets = classify.get_test_tweets(conn)
    test_feature_set = classify.process_tweets(test_tweets)

    classifier_accuracy = accuracy(classifier, test_feature_set)

    #print count_table
    print "classifier accuracy: " + repr(classifier_accuracy)
def main_function():
    conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'],
                           user=DATABASES['date_cutoff']['USER'],
                           passwd=DATABASES['date_cutoff']['PASSWORD'],
                           db=DATABASES['date_cutoff']['NAME'])

    training_tweets = get_training_tweets(conn)
    training_feature_set = classify.process_tweets(training_tweets)

    classifier = NaiveBayesClassifier.train(training_feature_set)

    error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}

    full_matrix = {
        '+': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        '-': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        'I': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        'O': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        }
    }

    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}

    test_tweets = get_test_tweets(conn)
    test_feature_set = classify.process_tweets(test_tweets)

    classifier_accuracy = accuracy(classifier, test_feature_set)

    print count_table
    print "classifier accuracy: " + repr(classifier_accuracy)
def main_function():
	conn = MySQLdb.connect(host=DATABASES['default']['HOST'], 
			user=DATABASES['default']['USER'], 
			passwd=DATABASES['default']['PASSWORD'], 
			db=DATABASES['default']['NAME'])

	training_tweets = classify.get_training_tweets(conn_analysis)
	training_feature_set = classify.process_tweets(training_tweets)

	config_megam('/opt/packages')
	classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  
	tweets = classify.get_tweets_to_classify(conn_analysis);

	for tweet in tweets:
		text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
		guess = classifier.classify(classify.process_tweet(text))
		update_tweet_polarity(tweet[0], guess, conn_analysis)
		count_table[guess] += 1

	#For the tweets where polarity was determined manually, copy from 
	#majority_vote to auto_vote
	fix_manual_tweets(conn_analysis)

	print count_table
Exemple #5
0
def main_function():
    conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'],
                           user=DATABASES['ensemble']['USER'],
                           passwd=DATABASES['ensemble']['PASSWORD'],
                           db=DATABASES['ensemble']['NAME'])

    training_tweets = classify.get_training_tweets(conn)
    training_feature_set = classify.process_tweets(training_tweets)
    classifier = NaiveBayesClassifier.train(training_feature_set)

    error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0}

    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    tweets = classify.get_tweets_to_classify(conn)

    for tweet in tweets:
        text = classify.get_tweet_text(conn, tweet[0])[0][0]
        guess = classifier.classify(classify.process_tweet(text))
        classify.update_tweet_polarity(tweet[0], guess, conn)
        count_table[guess] += 1

    #fix_manual_tweets(conn_analysis)
    classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED)

    print count_table
    print full_matrix
def main_function():
	conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], 
			user=DATABASES['ensemble']['USER'], 
			passwd=DATABASES['ensemble']['PASSWORD'], 
			db=DATABASES['ensemble']['NAME'])

	training_tweets = classify.get_training_tweets(conn)
	training_feature_set = classify.process_tweets(training_tweets)
	classifier = NaiveBayesClassifier.train(training_feature_set)

	error_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	count_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	guess_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	
	count_table = {'+':0, '-':0, 'I':0, 'O':0}  
	tweets = classify.get_tweets_to_classify(conn);

	for tweet in tweets:
		text = classify.get_tweet_text(conn, tweet[0])[0][0]
		guess = classifier.classify(classify.process_tweet(text))
		classify.update_tweet_polarity(tweet[0], guess, conn)
		count_table[guess] += 1

	#fix_manual_tweets(conn_analysis)
	classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED)

	print count_table
	print full_matrix
def main_function():
    conn = MySQLdb.connect(host=DATABASES['default']['HOST'],
                           user=DATABASES['default']['USER'],
                           passwd=DATABASES['default']['PASSWORD'],
                           db=DATABASES['default']['NAME'])

    training_tweets = classify.get_training_tweets(conn_analysis)
    training_feature_set = classify.process_tweets(training_tweets)

    config_megam('/opt/packages')
    classifier = MaxentClassifier.train(training_feature_set,
                                        algorithm="megam",
                                        trace=0)

    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    tweets = classify.get_tweets_to_classify(conn_analysis)

    for tweet in tweets:
        text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
        guess = classifier.classify(classify.process_tweet(text))
        update_tweet_polarity(tweet[0], guess, conn_analysis)
        count_table[guess] += 1

    #For the tweets where polarity was determined manually, copy from
    #majority_vote to auto_vote
    fix_manual_tweets(conn_analysis)

    print count_table
def main_function():
	conn = MySQLdb.connect(host=DATABASES['default']['HOST'], 
			user=DATABASES['default']['USER'], 
			passwd=DATABASES['default']['PASSWORD'], 
			db=DATABASES['default']['NAME'])

	training_tweets = classify.get_training_tweets(conn)
	training_feature_set = classify.process_tweets(training_tweets)

	bayes_classifier = NaiveBayesClassifier.train(training_feature_set)

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  

	test_tweets = classify.get_test_tweets(conn)

	for tweet in test_tweets:
		text = classify.get_tweet_text(conn, tweet[0])[0][0]
		guess = bayes_classifier.classify(classify.process_tweet(text))
		classify.update_tweet_polarity(tweet[0], guess, conn)
		count_table[guess] += 1

	print "Naive Bayes"
	print count_table

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  
	config_megam('/opt/packages')
	max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)

	for tweet in test_tweets:
		text = classify.get_tweet_text(conn, tweet[0])[0][0]
		guess = max_ent_classifier.classify(classify.process_tweet(text))
		update_tweet_polarity_ensemble(tweet[0], guess, conn)
		count_table[guess] += 1

	print "Maximum Entropy"
	print count_table

	#generate the accuracy matrix
	full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, 
				'-':{'+':0, '-':0, 'I':0, 'O':0}, 
				'I':{'+':0, '-':0, 'I':0, 'O':0}, 
				'O':{'+':0, '-':0, 'I':0, 'O':0}}

	for tweet in test_tweets:
		result = classify.run_sql(conn, classify.Statements.CHECK_CONSENSUS % tweet[0])
		guess = result[0][0]

		actual_result = classify.run_sql(conn, classify.Statements.CHECK_MAJORITY % tweet[0])
		actual = actual_result[0][0]

		if guess is not None:
			if actual is not None:
				full_matrix[actual][guess] += 1

	print full_matrix
def main_function():
	conn = MySQLdb.connect(host=DATABASES['default']['HOST'], 
			user=DATABASES['default']['USER'], 
			passwd=DATABASES['default']['PASSWORD'], 
			db=DATABASES['default']['NAME'])

	training_tweets = classify.get_training_tweets(conn)
	training_feature_set = classify.process_tweets(training_tweets)

	config_megam('/opt/packages')
	#classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
	classifier = NaiveBayesClassifier.train(training_feature_set)
	#classifier.show_most_informative_features(50, show='pos')
	#classifier.show_most_informative_features(50, show='neg')

	#classifier.explain(training_feature_set[0][0])
	#print training_feature_set[0]

	error_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	count_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	guess_dict = {'+':0, '-':0, 'I':0, 'O':0} 

	full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, 
				'-':{'+':0, '-':0, 'I':0, 'O':0}, 
				'I':{'+':0, '-':0, 'I':0, 'O':0}, 
				'O':{'+':0, '-':0, 'I':0, 'O':0}}

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  

	test_tweets = classify.get_test_tweets(conn)
	test_feature_set = classify.process_tweets(test_tweets)

	classifier_accuracy = accuracy(classifier, test_feature_set)

	#print count_table
	print "classifier accuracy: " + repr(classifier_accuracy)
Exemple #10
0
def main_function():
    conn_analysis = MySQLdb.connect(host="localhost",
                                    user="******",
                                    passwd="tanzania",
                                    db="twitter_heart")

    training_tweets = classify.get_training_tweets(conn_analysis)
    training_feature_set = classify.process_tweets(training_tweets)

    tweets = classify.get_tweets_to_classify(conn_analysis)

    bayes_classifier = NaiveBayesClassifier.train(training_feature_set)
    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}

    for tweet in tweets:
        text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
        guess = bayes_classifier.classify(classify.process_tweet(text))
        classify.update_tweet_polarity(tweet[0], guess, conn_analysis)
        count_table[guess] += 1

    print "Naive Bayes"
    print count_table

    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    config_megam('/opt/packages')
    max_ent_classifier = MaxentClassifier.train(training_feature_set,
                                                algorithm="megam",
                                                trace=0)

    for tweet in tweets:
        text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
        guess = max_ent_classifier.classify(classify.process_tweet(text))
        update_max_ent_polarity(tweet[0], guess, conn_analysis)
        count_table[guess] += 1

    #For the tweets where polarity was determined manually, copy from
    #majority_vote to auto_vote
    fix_manual_tweets(conn_analysis)

    print "Maximum Entropy"
    print count_table
def main_function():
	conn_analysis = MySQLdb.connect(host="localhost", user="******", passwd="tanzania", db="twitter_heart")

	training_tweets = classify.get_training_tweets(conn_analysis)
	training_feature_set = classify.process_tweets(training_tweets)

	tweets = classify.get_tweets_to_classify(conn_analysis);

	bayes_classifier = NaiveBayesClassifier.train(training_feature_set)
	count_table = {'+':0, '-':0, 'I':0, 'O':0}  

	for tweet in tweets:
		text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
		guess = bayes_classifier.classify(classify.process_tweet(text))
		classify.update_tweet_polarity(tweet[0], guess, conn_analysis)
		count_table[guess] += 1

	print "Naive Bayes"
	print count_table

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  
	config_megam('/opt/packages')
	max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)

	for tweet in tweets:
		text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
		guess = max_ent_classifier.classify(classify.process_tweet(text))
		update_max_ent_polarity(tweet[0], guess, conn_analysis)
		count_table[guess] += 1

	#For the tweets where polarity was determined manually, copy from 
	#majority_vote to auto_vote
	fix_manual_tweets(conn_analysis)

	print "Maximum Entropy"
	print count_table
def main_function():
    conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'],
                           user=DATABASES['ensemble']['USER'],
                           passwd=DATABASES['ensemble']['PASSWORD'],
                           db=DATABASES['ensemble']['NAME'])

    training_tweets = classify.get_training_tweets(conn)
    training_feature_set = classify.process_tweets(training_tweets)

    bayes_classifier = NaiveBayesClassifier.train(training_feature_set)

    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}

    test_tweets = classify.get_test_tweets(conn)

    for tweet in test_tweets:
        text = classify.get_tweet_text(conn, tweet[0])[0][0]
        guess = bayes_classifier.classify(classify.process_tweet(text))
        classify.update_tweet_polarity(tweet[0], guess, conn)
        count_table[guess] += 1

    print "Naive Bayes"
    print count_table

    count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0}
    config_megam('/opt/packages')
    max_ent_classifier = MaxentClassifier.train(training_feature_set,
                                                algorithm="megam",
                                                trace=0)

    for tweet in test_tweets:
        text = classify.get_tweet_text(conn, tweet[0])[0][0]
        guess = max_ent_classifier.classify(classify.process_tweet(text))
        update_tweet_polarity_ensemble(tweet[0], guess, conn)
        count_table[guess] += 1

    print "Maximum Entropy"
    print count_table

    #generate the accuracy matrix
    full_matrix = {
        '+': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        '-': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        'I': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        },
        'O': {
            '+': 0,
            '-': 0,
            'I': 0,
            'O': 0
        }
    }

    for tweet in test_tweets:
        result = classify.run_sql(
            conn, classify.Statements.CHECK_CONSENSUS % tweet[0])
        guess = result[0][0]

        actual_result = classify.run_sql(
            conn, classify.Statements.CHECK_MAJORITY % tweet[0])
        actual = actual_result[0][0]

        if guess is not None:
            if actual is not None:
                full_matrix[actual][guess] += 1

    print full_matrix