Esempio n. 1
0
def saveClassifiers():
    import psyco
    map_fn = "%s/data/directions/direction_floor_3/direction_floor_3_small_filled.cmf" % TKLIB_HOME
    cluster_fn = "%s/data/directions/direction_floor_3/skels/direction_floor_3_skel.pck" % TKLIB_HOME
    gtruth_tag_fn = "%s/data/directions/direction_floor_3/tags/df3_small_tags.tag" % TKLIB_HOME
    
    assignment_fns = ["%s/nlp/data/aaai_2010_smv/stefie10/assignment1.1.yaml" % TKLIB_HOME,
                      "%s/nlp/data/aaai_2010_smv/stefie10/assignment1.2.yaml" % TKLIB_HOME,
                      "%s/nlp/data/aaai_2010_smv/stefie10/assignment2.1.yaml" % TKLIB_HOME,
                      "%s/nlp/data/aaai_2010_smv/tkollar/assignment3.1.yaml" % TKLIB_HOME,
                      ]

    tagFile = tag_util.tag_file(gtruth_tag_fn, map_fn)
    tagFile.get_map()
    tagFile.get_tag_names()
    
    #print cluster_fn
    #raw_input()
    skeleton = carmen_map_skeletonizer.load(cluster_fn, map_fn)
    assignments = [Assignment.load(fn, tagFile, skeleton)for  fn in assignment_fns]
    #classifiers = makeClassifiers(assignment)
    result = []
    def run():
        classifiers = makeClassifiers(assignments)
        result.append(classifiers)
        

    start = time.time()
    cProfile.runctx("run()", globals(), locals(), "profile.out")
    end = time.time()
    print "took", (end - start)/60., "minutes"
    classifiers = result[0]    
    fname = "%s/nlp/data/engines.verbs.floor3.stefie10.pck" % TKLIB_HOME
    cPickle.dump(classifiers, open(fname, "w"))
    print "wrote", fname
    
    #testingAssignment = Assignment.load("%s/nlp/data/aaai_2010_smv/stefie10/assignment1.1.yaml" % TKLIB_HOME, tagFile, skeleton)
    #testingAssignment = Assignment.load("%s/nlp/data/aaai_2010_smv/tkollar/assignment3.1.yaml" % TKLIB_HOME, tagFile, skeleton)
    testingAssignment = Assignment.load("%s/nlp/data/aaai_2010_smv/stefie10/assignment4.1.yaml" % TKLIB_HOME, tagFile, skeleton)    
    
    for name, c in classifiers.iteritems():
        engine = c.engine
        testing = makeTable(engine, [testingAssignment])
        results = orngTest.testOnData([c.classifier], testing)
        mpl.figure()
        line, = orangeGui.rocCurve(results, engine.name, stepSize=0.001, marker="x", plotArgs=dict(color="k"))
        
        mpl.title(engine.name.capitalize(), fontsize=30)
        mpl.xlabel("TP")
        mpl.ylabel("FP")
        mpl.xticks([0, 1], fontsize=20)
        mpl.yticks([0, 1], fontsize=20)
        line.set_label(engine.name.upper())
        mpl.savefig("roc.%s.png" % engine.name)
        orangeUtils.displayResults(results)
    #mpl.legend(loc="lower right")
    #mpl.title("Classifiers for Verbs")
    mpl.show()
Esempio n. 2
0
    def doRun(title, newDomain, learner, training, testing, marker):
        newTraining = orangeUtils.convertTable(training, newDomain)
        newTesting = orangeUtils.convertTable(testing, newDomain)

        classifier = learner(newTraining)
        results = orngTest.testOnData([classifier], testing, storeClassifiers=1)
        
        print "title", title
        displayResults(results)    
        line, = rocCurve(results, title, stepSize=0.001, marker=marker)
        line.set_label(title)
        lines.append(line)

        return results
Esempio n. 3
0
def smallRocCurve():

    trainer = Trainer()

    keys = None
    keys = None
    #keys = ["towards"]
    for i, key in enumerate(trainer.annotationEngines):
        if keys != None and not key in keys:
            continue

        print "*****************************************************"
        print key
        engine = trainer.engineMap[key]
        mpl.figure(figsize=(8, 8))
        print "training"
        table = trainer.makeTable(engine)
        cv_indices = orange.MakeRandomIndices2(table, p0=0.75)

        training = table.select(cv_indices, 0, negate=True)
        testing = table.select(cv_indices, 0, negate=False)

        classifier = orangePickle.PickleableClassifier(training,
                                                       orngBayes.BayesLearner)
        #orange.LogRegLearner)
        results = orngTest.testOnData([classifier], testing)

        displayResults(results)

        line = rocCurve(results,
                        "",
                        stepSize=0.001,
                        marker=".",
                        plotArgs=dict(linewidth=5))

        line[0].set_label(engine.name())
        mpl.xlabel("FP", fontsize=25)
        mpl.ylabel("TP", fontsize=25)
        mpl.xticks([0, 1], fontsize=20)
        mpl.yticks([0, 1], fontsize=20)
        ax = mpl.gca()
        ax.set_aspect(1. / ax.get_data_ratio())
        mpl.title(engine.name().capitalize(), fontsize=30)
        #mpl.legend(loc='lower right', prop=FontProperties(size=25))
        mpl.savefig("roc.%s.png" % engine.name())

    mpl.show()
Esempio n. 4
0
def main():
    from sys import argv
        
    map_fn = argv[1]
    gtruth_tag_fn = argv[2]
    cluster_fn = argv[3]
    assignment_fns = argv[4:]
    tagFile = tag_util.tag_file(gtruth_tag_fn, map_fn)
    tagFile.get_map()
    tagFile.get_tag_names()
    
    
    skeleton = carmen_map_skeletonizer.load(cluster_fn, map_fn)

    assignments = [Assignment.load(assignment_fn, tagFile, skeleton) for assignment_fn 
                   in assignment_fns]
    
    engineMap = dict((x.name, x) for x in
                     [bring.Engine(), 
                      follow.Engine(), 
                      meet.Engine(), 
                      avoid.Engine(), 
                      #wander.Engine(), 
                      #go.Engine(),
                      ])
    
    
    for engine in engineMap.values():
        verb = engine.name
        if verb != "follow" and False:
            continue
        

        def run():
            return makeTable(engine, assignments)
        #cProfile.runctx("run()", globals(), locals(), "profile.out")
        #return
        table = run()
        print "verb", verb, len(table)  
    
        cv_indices = orange.MakeRandomIndicesCV(table, 2)
        humanLabeledTraining = table.select(cv_indices, 0)
        training = orange.ExampleTable(humanLabeledTraining.domain)
        training.extend(humanLabeledTraining)
        generatedTraining = makeSubsetExamples(engine, humanLabeledTraining)
        training.extend(generatedTraining)
        
        print "Using", len(generatedTraining), "subset examples"
        
        testing = table.select(cv_indices, 1)
        
        #testFeatureSubsets(engine, training, testing)
        
        #classifier  = orngBayes.BayesLearner(training)
        classifier  = RandomForestLearner(training)
        results = orngTest.testOnData([classifier], testing)
        print "results", results
        tuples = list(zip(testing, results.results))
        tuples.sort(key=lambda x: x[0]["description"])
        for e, r in tuples:
#            print e["description"], e["hasApproach"], e["hasFollow"],
            if r.actualClass == r.classes[0]:
                print "correct", e["description"], e["entry"].value.id 
            else:
                print "incorrect", e["description"], e["entry"].value.id 

        mpl.figure(figsize=(6,6))
        mpl.subplots_adjust(bottom=0.13)
        line, = orangeGui.rocCurve(results, engine.name, stepSize=0.001,
                                   plotArgs={"color":"black"})

        orangeUtils.displayResults(results)
        mpl.xlabel("FP", fontsize=32)
        mpl.ylabel("TP", fontsize=32)
        mpl.xticks((0, 1), fontsize=20)
        mpl.yticks((0, 1), fontsize=20)
        line.set_label(engine.name)
        mpl.title(engine.name.capitalize(), fontsize=32)
        mpl.savefig("roc_%s.png" % engine.name)
        mpl.savefig("roc_%s.ps" % engine.name)
    mpl.show()
Esempio n. 5
0
def ablateFeaturesForCls(engineCls):
    mpl.figure()
    trainer = Trainer()
    engine = engineCls()
    trainer.configureClassifier(engine)
    markers = [
        '.',
        ',',
        'v',
        '^',
        '<',
        '>',
        '1',
        '2',
        '3',
        '4',
        's',
        'p',
        '*',
        'h',
        'H',
    ]
    colors = ["b", "g", "r", "c", "m", "y"]

    sub_engines = []
    for i, name in enumerate(sorted(engine.masterList)):
        sub_engine = engineCls()
        sub_engine.setFeatureList([name])
        sub_engines.append((name, sub_engine))

    markers = markers[0:len(sub_engines)]
    colors = colors[0:len(sub_engines)]
    sub_engines.append(("all", engineCls()))
    markers.append("o")
    colors.append("k")

    for i, (name, sub_engine) in enumerate(sub_engines):
        table = trainer.configureClassifier(sub_engine)
        cv_indices = orange.MakeRandomIndices2(table, p0=0.75)

        training = table.select(cv_indices, 0, negate=True)
        testing = table.select(cv_indices, 0, negate=False)

        #classifier = orange.LogRegLearner(training)
        classifier = orngBayes.BayesLearner(training)
        results = orngTest.testOnData([classifier], testing)
        displayResults(results)

        line = rocCurve(
            results,
            "",
            stepSize=0.001,
            marker=markers[i % len(markers)],
            plotArgs=dict(linewidth=5,
                          markersize=10,
                          color=colors[i % len(colors)]),
        )

        line[0].set_label(name)

    mpl.title(engine.name(), size=30)
    mpl.xlabel("FP", fontsize=30)
    mpl.ylabel("TP", fontsize=30)
    mpl.xticks([0, 1], fontsize=17)
    mpl.yticks([0, 1], fontsize=17)
    mpl.subplots_adjust(bottom=0.14, top=0.91)
    mpl.legend(loc="lower right", prop=dict(size=17))
    mpl.savefig("roc.ablate.%s.png" % engine.name())