def evaluate_classifiers(v,test_keys,classifier_dict,selection="r",mode="unigram"):
    print "creating alpha results from classifiers..."
    results = {}
    if not(checkDir(sub="alpha_results/target/",selection=selection,mode=mode)):
        createDir(sub="alpha_results/target/",selection=selection,mode=mode)
    for cid in classifier_dict:
        print "evaulating cid={0}".format(cid)
        #if checkDir('/cresults/indiv')
     
        outpath = "cresults/alpha_results/target/{0}/{1}/{2}.txt".format(mode,selection,cid)

        v.score_tweets_bycid(cid)
        # just need to change here to do combinations of classifiers
        v.build_vote_dicts()
        basic = v.basic_result_dict
        weighted = v.weighted_result_dict
        summarized = v.summarize_weighted_results()
        for key in test_keys:
            pos_votes = basic[key].count("positive")
            neg_votes = basic[key].count("negative")
            actual = v.instances[key].label
            pos_score = summarized[key]["positive"]
            neg_score = summarized[key]["negative"]
            diff = pos_score-neg_score
            beta=0
            # this should be programable to optimize beta! 
            score_vote = "positive" if (pos_score > neg_score) else "negative"
            count_vote = "positive" if pos_votes > neg_votes else "negative" # doesnt work for ties # if certain num negvotes?
            line = (actual,pos_score,neg_score,diff,score_vote,pos_votes,neg_votes,count_vote)
            results[key] = line 

        evaluate(results,fname=outpath)
        v.reset()
Example #2
0
def write_classifier_dict(keys,classifier_dict,selection,mode):
    print "writing classifier dict"
    if not(checkDir(mode=mode,sub='pickles/target/',selection=selection)):
        createDir(mode=mode,sub="pickles/target/",selection=selection)
    for cid,classifier in classifier_dict.items():
        print "pickling cid={0}".format(cid)
        print classifier
        #if checkDir('/cresults/indiv')
        outpath = "cresults/pickles/target/{0}/{1}/{2}.pkl".format(mode,selection,cid)
        #try:
        with open(outpath,'wb') as f:
            cPickle.dump(classifier,f)