def saveClassifiers(): import psyco map_fn = "%s/data/directions/direction_floor_3/direction_floor_3_small_filled.cmf" % TKLIB_HOME cluster_fn = "%s/data/directions/direction_floor_3/skels/direction_floor_3_skel.pck" % TKLIB_HOME gtruth_tag_fn = "%s/data/directions/direction_floor_3/tags/df3_small_tags.tag" % TKLIB_HOME assignment_fns = ["%s/nlp/data/aaai_2010_smv/stefie10/assignment1.1.yaml" % TKLIB_HOME, "%s/nlp/data/aaai_2010_smv/stefie10/assignment1.2.yaml" % TKLIB_HOME, "%s/nlp/data/aaai_2010_smv/stefie10/assignment2.1.yaml" % TKLIB_HOME, "%s/nlp/data/aaai_2010_smv/tkollar/assignment3.1.yaml" % TKLIB_HOME, ] tagFile = tag_util.tag_file(gtruth_tag_fn, map_fn) tagFile.get_map() tagFile.get_tag_names() #print cluster_fn #raw_input() skeleton = carmen_map_skeletonizer.load(cluster_fn, map_fn) assignments = [Assignment.load(fn, tagFile, skeleton)for fn in assignment_fns] #classifiers = makeClassifiers(assignment) result = [] def run(): classifiers = makeClassifiers(assignments) result.append(classifiers) start = time.time() cProfile.runctx("run()", globals(), locals(), "profile.out") end = time.time() print "took", (end - start)/60., "minutes" classifiers = result[0] fname = "%s/nlp/data/engines.verbs.floor3.stefie10.pck" % TKLIB_HOME cPickle.dump(classifiers, open(fname, "w")) print "wrote", fname #testingAssignment = Assignment.load("%s/nlp/data/aaai_2010_smv/stefie10/assignment1.1.yaml" % TKLIB_HOME, tagFile, skeleton) #testingAssignment = Assignment.load("%s/nlp/data/aaai_2010_smv/tkollar/assignment3.1.yaml" % TKLIB_HOME, tagFile, skeleton) testingAssignment = Assignment.load("%s/nlp/data/aaai_2010_smv/stefie10/assignment4.1.yaml" % TKLIB_HOME, tagFile, skeleton) for name, c in classifiers.iteritems(): engine = c.engine testing = makeTable(engine, [testingAssignment]) results = orngTest.testOnData([c.classifier], testing) mpl.figure() line, = orangeGui.rocCurve(results, engine.name, stepSize=0.001, marker="x", plotArgs=dict(color="k")) mpl.title(engine.name.capitalize(), fontsize=30) mpl.xlabel("TP") mpl.ylabel("FP") mpl.xticks([0, 1], fontsize=20) mpl.yticks([0, 1], fontsize=20) line.set_label(engine.name.upper()) mpl.savefig("roc.%s.png" % engine.name) orangeUtils.displayResults(results) #mpl.legend(loc="lower right") #mpl.title("Classifiers for Verbs") mpl.show()
def doRun(title, newDomain, learner, training, testing, marker): newTraining = orangeUtils.convertTable(training, newDomain) newTesting = orangeUtils.convertTable(testing, newDomain) classifier = learner(newTraining) results = orngTest.testOnData([classifier], testing, storeClassifiers=1) print "title", title displayResults(results) line, = rocCurve(results, title, stepSize=0.001, marker=marker) line.set_label(title) lines.append(line) return results
def smallRocCurve(): trainer = Trainer() keys = None keys = None #keys = ["towards"] for i, key in enumerate(trainer.annotationEngines): if keys != None and not key in keys: continue print "*****************************************************" print key engine = trainer.engineMap[key] mpl.figure(figsize=(8, 8)) print "training" table = trainer.makeTable(engine) cv_indices = orange.MakeRandomIndices2(table, p0=0.75) training = table.select(cv_indices, 0, negate=True) testing = table.select(cv_indices, 0, negate=False) classifier = orangePickle.PickleableClassifier(training, orngBayes.BayesLearner) #orange.LogRegLearner) results = orngTest.testOnData([classifier], testing) displayResults(results) line = rocCurve(results, "", stepSize=0.001, marker=".", plotArgs=dict(linewidth=5)) line[0].set_label(engine.name()) mpl.xlabel("FP", fontsize=25) mpl.ylabel("TP", fontsize=25) mpl.xticks([0, 1], fontsize=20) mpl.yticks([0, 1], fontsize=20) ax = mpl.gca() ax.set_aspect(1. / ax.get_data_ratio()) mpl.title(engine.name().capitalize(), fontsize=30) #mpl.legend(loc='lower right', prop=FontProperties(size=25)) mpl.savefig("roc.%s.png" % engine.name()) mpl.show()
def main(): from sys import argv map_fn = argv[1] gtruth_tag_fn = argv[2] cluster_fn = argv[3] assignment_fns = argv[4:] tagFile = tag_util.tag_file(gtruth_tag_fn, map_fn) tagFile.get_map() tagFile.get_tag_names() skeleton = carmen_map_skeletonizer.load(cluster_fn, map_fn) assignments = [Assignment.load(assignment_fn, tagFile, skeleton) for assignment_fn in assignment_fns] engineMap = dict((x.name, x) for x in [bring.Engine(), follow.Engine(), meet.Engine(), avoid.Engine(), #wander.Engine(), #go.Engine(), ]) for engine in engineMap.values(): verb = engine.name if verb != "follow" and False: continue def run(): return makeTable(engine, assignments) #cProfile.runctx("run()", globals(), locals(), "profile.out") #return table = run() print "verb", verb, len(table) cv_indices = orange.MakeRandomIndicesCV(table, 2) humanLabeledTraining = table.select(cv_indices, 0) training = orange.ExampleTable(humanLabeledTraining.domain) training.extend(humanLabeledTraining) generatedTraining = makeSubsetExamples(engine, humanLabeledTraining) training.extend(generatedTraining) print "Using", len(generatedTraining), "subset examples" testing = table.select(cv_indices, 1) #testFeatureSubsets(engine, training, testing) #classifier = orngBayes.BayesLearner(training) classifier = RandomForestLearner(training) results = orngTest.testOnData([classifier], testing) print "results", results tuples = list(zip(testing, results.results)) tuples.sort(key=lambda x: x[0]["description"]) for e, r in tuples: # print e["description"], e["hasApproach"], e["hasFollow"], if r.actualClass == r.classes[0]: print "correct", e["description"], e["entry"].value.id else: print "incorrect", e["description"], e["entry"].value.id mpl.figure(figsize=(6,6)) mpl.subplots_adjust(bottom=0.13) line, = orangeGui.rocCurve(results, engine.name, stepSize=0.001, plotArgs={"color":"black"}) orangeUtils.displayResults(results) mpl.xlabel("FP", fontsize=32) mpl.ylabel("TP", fontsize=32) mpl.xticks((0, 1), fontsize=20) mpl.yticks((0, 1), fontsize=20) line.set_label(engine.name) mpl.title(engine.name.capitalize(), fontsize=32) mpl.savefig("roc_%s.png" % engine.name) mpl.savefig("roc_%s.ps" % engine.name) mpl.show()
def nway(): engine_to_examples = {} trainer = Trainer() classes = set() for i, key in enumerate(trainer.annotationEngines): engine = trainer.engineMap[key] table = trainer.makeTable(engine) for ex in table: if ex["farAway"].value: cls = "null" else: cls = ex["sourceEngineName"].value geometry = ex["geometry"].value engine_to_examples.setdefault(cls, []) classes.add(cls) examples = [ trainer.engineMap[key].makeExample(expectInsane=True, **geometry) for key in trainer.annotationEngines if not len(geometry["figure"]) == 0 ] engine_to_examples[cls].append(examples) if i >= 1: #break pass variables = [] for ex in examples: for attr in ex.domain: if attr.name == "class": continue new_attr = orange.FloatVariable(attr.name) variables.append(new_attr) domain = orange.Domain(variables, orange.EnumVariable("class", values=list(classes))) table = orange.ExampleTable(domain) for engine_name, example_lists in engine_to_examples.iteritems(): for example_list in example_lists: ex = orange.Example(domain) for engine_ex in example_list: for attr in engine_ex.domain: ex[attr.name] = engine_ex[attr.name] ex["class"] = engine_name table.append(ex) print "domain", domain cv_indices = orange.MakeRandomIndices2(table, p0=0.75) training = table.select(cv_indices, 0, negate=True) testing = table.select(cv_indices, 0, negate=False) #classifier = orngBayes.BayesLearner(training) classifier = orangePickle.PickleableClassifier(training, orngBayes.BayesLearner) results = orngTest.testOnData([classifier], testing) print orngStat.CA(results) cm = orngStat.confusionMatrices(results)[0] classes = list(domain.classVar.values) print " ", " ".join([c.rjust(12) for c in classes + ["", ""]]) for className, classConfusions in zip(classes, cm): #format = ("%s" + ("\t%i" * len(classes))) values = (className, ) + tuple(classConfusions) print " ".join([str(c).rjust(12) for c in values]) #print format % values for name in classes: classIndex = classes.index(name) mpl.figure() rocCurve(results, "", classIndex, stepSize=0.001, plotArgs=dict(linewidth=5, markersize=10)) mpl.title(name, size=30) mpl.xlabel("FP", fontsize=30) mpl.ylabel("TP", fontsize=30) mpl.xticks([0, 1], fontsize=17) mpl.yticks([0, 1], fontsize=17) fname = "nway.pck" print "saving", fname with open(fname, "w") as f: pickle.dump(classifier, f, protocol=2) mpl.show()
def ablateFeaturesForCls(engineCls): mpl.figure() trainer = Trainer() engine = engineCls() trainer.configureClassifier(engine) markers = [ '.', ',', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', ] colors = ["b", "g", "r", "c", "m", "y"] sub_engines = [] for i, name in enumerate(sorted(engine.masterList)): sub_engine = engineCls() sub_engine.setFeatureList([name]) sub_engines.append((name, sub_engine)) markers = markers[0:len(sub_engines)] colors = colors[0:len(sub_engines)] sub_engines.append(("all", engineCls())) markers.append("o") colors.append("k") for i, (name, sub_engine) in enumerate(sub_engines): table = trainer.configureClassifier(sub_engine) cv_indices = orange.MakeRandomIndices2(table, p0=0.75) training = table.select(cv_indices, 0, negate=True) testing = table.select(cv_indices, 0, negate=False) #classifier = orange.LogRegLearner(training) classifier = orngBayes.BayesLearner(training) results = orngTest.testOnData([classifier], testing) displayResults(results) line = rocCurve( results, "", stepSize=0.001, marker=markers[i % len(markers)], plotArgs=dict(linewidth=5, markersize=10, color=colors[i % len(colors)]), ) line[0].set_label(name) mpl.title(engine.name(), size=30) mpl.xlabel("FP", fontsize=30) mpl.ylabel("TP", fontsize=30) mpl.xticks([0, 1], fontsize=17) mpl.yticks([0, 1], fontsize=17) mpl.subplots_adjust(bottom=0.14, top=0.91) mpl.legend(loc="lower right", prop=dict(size=17)) mpl.savefig("roc.ablate.%s.png" % engine.name())