def main(): """ Main script. NOTE: as a future extension, consider one trainer for each congressional body. How do senators, reps tweet? """ DB = sqlite3.connect("data/tweets") CLASSIFIER = NBClassifier() # Train the classifier using sample of legislators' tweets. r_limit = int(0.3 * 188331) training_set = partition_sample(r_limit, DB, 'r') for row in training_set: CLASSIFIER.train(row) # Now attempt to classify tweets in test set. test_classifier(CLASSIFIER, DB, 'reps') s_limit = int(0.3 * 29463) training_set = partition_sample(r_limit, DB, 'd') for row in training_set: CLASSIFIER.train(row) # Same thing for senators. test_classifier(CLASSIFIER, DB, 'sens')
def main(): """ Main script. NOTE: as a future extension, consider one trainer for each congressional body. How do senators, reps tweet? """ DB = sqlite3.connect("data/tweets") REP_CLASSIFIER = NBClassifier() # Train the classifier using sample of legislators' tweets. r_limit = int(0.2 * 110000) training_set = partition_sample(r_limit, DB, 'r') for row in training_set: REP_CLASSIFIER.train(row) # Print 10 most common features in classifier, along with class info. print "\nMOST COMMON REP FEATURES:\n" REP_CLASSIFIER.print_common_features(n=20) # Test the classifier. test_classifier(REP_CLASSIFIER, DB, 'reps') SEN_CLASSIFIER = NBClassifier() s_limit = int(0.2 * 29463) training_set = partition_sample(s_limit, DB, 's') for row in training_set: SEN_CLASSIFIER.train(row) print "\nMOST COMMON SEN FEATURES:\n" SEN_CLASSIFIER.print_common_features(n=20) # Test the classifier. test_classifier(SEN_CLASSIFIER, DB, 'sens')