def train_ngram_classifiers(mode="unigram",selection="ngramrank",word=True,pos=False,rank=1500): existing_classifiers = get_existing_classifiers(sub="pickles/target",selection=selection,mode=mode) ngram_classifiers = get_ngram_classifiers(keys, existing_classifiers,word=word,pos=pos,selection=selection,rank=rank) classifier_dict = ngram_classifiers if classifier_dict: print "evaluating classifier alpha_results for {0}\n".format(classifier_dict.keys()) write_classifier_dict(keys=keys,classifier_dict=classifier_dict,selection=selection,mode=mode) test_keys = classifier_dict.values()[0].test_keys test_tweets,test_instances = get_test_data(test_keys) v = Vote(tweets=test_tweets,instances=test_instances,classifiers=classifier_dict,selection=selection) evaluate_classifiers(v, test_keys,classifier_dict,mode=mode,selection=selection) # AT THIS POINT CLASSIFIERS ARE TRAINED # need some logic going in --> are we using already classified stuff or making new mode? else: print "already trained {0}".format(existing_classifiers) return existing_classifiers
def train_misc_classifiers(selection="default",mode="misc"): # ask rich about evaluating votes on training keys instead of testing keys existing_classifiers = get_existing_classifiers(sub="pickles/target",selection=selection,mode=mode) print "existing {0}".format(existing_classifiers) misc_classifiers = get_misc_classifiers(keys,existing_class=existing_classifiers,selection=selection) classifier_dict = misc_classifiers if classifier_dict: print "evaluating classifier alpha_results for {0}\n".format(classifier_dict.keys()) write_classifier_dict(keys=keys,classifier_dict=classifier_dict,selection=selection,mode=mode) test_keys = classifier_dict.values()[0].test_keys train_keys = classifier_dict.values()[0].train_keys test_tweets,test_instances = get_test_data(test_keys) train_tweets,train_instances = get_test_data(train_keys) v = Vote(tweets=test_tweets,instances=test_instances,classifiers=classifier_dict,selection=selection) evaluate_classifiers(v, test_keys,classifier_dict,mode=mode,selection=selection) else: print "already tained {0}".format(existing_classifiers) return existing_classifiers