예제 #1
0
def train_ngram_classifiers(mode="unigram",selection="ngramrank",word=True,pos=False,rank=1500):
    existing_classifiers = get_existing_classifiers(sub="pickles/target",selection=selection,mode=mode)
    ngram_classifiers = get_ngram_classifiers(keys, existing_classifiers,word=word,pos=pos,selection=selection,rank=rank)
    classifier_dict = ngram_classifiers
    if classifier_dict:
        print "evaluating classifier alpha_results for {0}\n".format(classifier_dict.keys())
        write_classifier_dict(keys=keys,classifier_dict=classifier_dict,selection=selection,mode=mode)
        test_keys = classifier_dict.values()[0].test_keys
        test_tweets,test_instances = get_test_data(test_keys) 
        v = Vote(tweets=test_tweets,instances=test_instances,classifiers=classifier_dict,selection=selection)
        evaluate_classifiers(v, test_keys,classifier_dict,mode=mode,selection=selection)

    # AT THIS POINT CLASSIFIERS ARE TRAINED
    # need some logic going in --> are we using already classified stuff or making new mode?
    else:
        print "already trained {0}".format(existing_classifiers)
    return existing_classifiers
예제 #2
0
def train_misc_classifiers(selection="default",mode="misc"):
    # ask rich about evaluating votes on training keys instead of testing keys
    existing_classifiers = get_existing_classifiers(sub="pickles/target",selection=selection,mode=mode)
    print "existing {0}".format(existing_classifiers)
    misc_classifiers = get_misc_classifiers(keys,existing_class=existing_classifiers,selection=selection)
    classifier_dict = misc_classifiers
    if classifier_dict:
        print "evaluating classifier alpha_results for {0}\n".format(classifier_dict.keys())
        write_classifier_dict(keys=keys,classifier_dict=classifier_dict,selection=selection,mode=mode)
        test_keys = classifier_dict.values()[0].test_keys
        train_keys = classifier_dict.values()[0].train_keys
        test_tweets,test_instances = get_test_data(test_keys) 
        train_tweets,train_instances = get_test_data(train_keys)
        v = Vote(tweets=test_tweets,instances=test_instances,classifiers=classifier_dict,selection=selection)
        evaluate_classifiers(v, test_keys,classifier_dict,mode=mode,selection=selection)
    else:
        print  "already tained {0}".format(existing_classifiers)
    return existing_classifiers