def evaluate_classifiers(v,test_keys,classifier_dict,selection="r",mode="unigram"): print "creating alpha results from classifiers..." results = {} if not(checkDir(sub="alpha_results/target/",selection=selection,mode=mode)): createDir(sub="alpha_results/target/",selection=selection,mode=mode) for cid in classifier_dict: print "evaulating cid={0}".format(cid) #if checkDir('/cresults/indiv') outpath = "cresults/alpha_results/target/{0}/{1}/{2}.txt".format(mode,selection,cid) v.score_tweets_bycid(cid) # just need to change here to do combinations of classifiers v.build_vote_dicts() basic = v.basic_result_dict weighted = v.weighted_result_dict summarized = v.summarize_weighted_results() for key in test_keys: pos_votes = basic[key].count("positive") neg_votes = basic[key].count("negative") actual = v.instances[key].label pos_score = summarized[key]["positive"] neg_score = summarized[key]["negative"] diff = pos_score-neg_score beta=0 # this should be programable to optimize beta! score_vote = "positive" if (pos_score > neg_score) else "negative" count_vote = "positive" if pos_votes > neg_votes else "negative" # doesnt work for ties # if certain num negvotes? line = (actual,pos_score,neg_score,diff,score_vote,pos_votes,neg_votes,count_vote) results[key] = line evaluate(results,fname=outpath) v.reset()
def write_classifier_dict(keys,classifier_dict,selection,mode): print "writing classifier dict" if not(checkDir(mode=mode,sub='pickles/target/',selection=selection)): createDir(mode=mode,sub="pickles/target/",selection=selection) for cid,classifier in classifier_dict.items(): print "pickling cid={0}".format(cid) print classifier #if checkDir('/cresults/indiv') outpath = "cresults/pickles/target/{0}/{1}/{2}.pkl".format(mode,selection,cid) #try: with open(outpath,'wb') as f: cPickle.dump(classifier,f)