def main(): for phrase in cf.phrases: basename = "../training/"+phrase.replace(" ","_") humangood = basename+".manual_good" allfile = basename+".pickedclaims" if os.path.exists(humangood): (good,bad,all) = a.get_human_sets(humangood,allfile) precision = float(len(good))/len(all) print phrase,"&",str(int(100*precision)) +"\%"
def get_training_data(phrases): human_marked = [] for phrase in phrases: basename = "../training/" + phrase.replace(" ", "_") humangood = basename + ".manual_good" allfile = basename + ".pickedclaims" if os.path.exists(humangood): (good, bad, all) = a.get_human_sets(humangood, allfile) for claim in good: human_marked.append((claim, True)) for claim in bad: human_marked.append((claim, False)) random.shuffle(human_marked) return human_marked