Ejemplo n.º 1
0
def main():
    """
    Main script.

    NOTE: as a future extension, consider one trainer for each congressional
    body. How do senators, reps tweet?
    """
      
    DB = sqlite3.connect("data/tweets")
    CLASSIFIER = NBClassifier()

    # Train the classifier using sample of legislators' tweets.    
    r_limit = int(0.3 * 188331)
    training_set = partition_sample(r_limit, DB, 'r')    
    for row in training_set:
        CLASSIFIER.train(row)

    # Now attempt to classify tweets in test set.
    test_classifier(CLASSIFIER, DB, 'reps')

    s_limit = int(0.3 * 29463)
    training_set = partition_sample(r_limit, DB, 'd')    
    for row in training_set:
        CLASSIFIER.train(row)

    # Same thing for senators.
    test_classifier(CLASSIFIER, DB, 'sens')
Ejemplo n.º 2
0
def main():
    """
    Main script.

    NOTE: as a future extension, consider one trainer for each congressional
    body. How do senators, reps tweet?
    """
      
    DB = sqlite3.connect("data/tweets")
    REP_CLASSIFIER = NBClassifier()

    # Train the classifier using sample of legislators' tweets.    
    r_limit = int(0.2 * 110000)
    training_set = partition_sample(r_limit, DB, 'r')    
    for row in training_set:
        REP_CLASSIFIER.train(row)

    # Print 10 most common features in classifier, along with class info.
    print "\nMOST COMMON REP FEATURES:\n"
    REP_CLASSIFIER.print_common_features(n=20)

    # Test the classifier.
    test_classifier(REP_CLASSIFIER, DB, 'reps')

    SEN_CLASSIFIER = NBClassifier()
    s_limit = int(0.2 * 29463)
    training_set = partition_sample(s_limit, DB, 's')    
    for row in training_set:
        SEN_CLASSIFIER.train(row)

    print "\nMOST COMMON SEN FEATURES:\n"
    SEN_CLASSIFIER.print_common_features(n=20)
    
    # Test the classifier.
    test_classifier(SEN_CLASSIFIER, DB, 'sens')