예제 #1
0
def main(forceFixedTestSet=False):
    recipes = list(Corpus("data/"))
    recipes.sort(key=lambda ar: ar.name)
    test_set_names = []
    forced_test_set_recipes = []
    other_recipes = [r for r in recipes if not r.name in test_set_names]
    
    assert len(other_recipes) + len(forced_test_set_recipes) == len(recipes)
    assert len(forced_test_set_recipes) == len(test_set_names)
    training = []
    testing = []
    
    while len(other_recipes) > 0:
        if len(testing) < 15:
            randVal = random.randrange(0, len(other_recipes)-1)
            testing.append(other_recipes.pop(randVal))
        else:
            training.append(other_recipes.pop(0))

    #testing.extend(forced_test_set_recipes)

    training_dataset = recipesToDataset(training)
    pickle_util.save("training.pck", training_dataset)

    testing_dataset = recipesToDataset(testing)
    pickle_util.save("testing.pck", testing_dataset)


    train_lccrf(training_dataset, "kitchenModel.pck", sigma=1.5)    

    print "training on", len(training)
    print "testing on", len(testing)
예제 #2
0
def main():
    """
    Evaluates object groundings for the NIST/BOLT evaluation.  Takes a
    special input file annotated with correctness for noun phrases in
    the test corpus.
    """
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("--cost-function-class", dest="cost_function_class")
    parser.add_option("--model", dest="model_fname")
    parser.add_option("--training_fname", dest="training_fname")
    parser.add_option("--corpus-fname", dest="corpus_fname")
    parser.add_option("--state-type", dest="state_type")

    (options, args) = parser.parse_args()
    print "training", options.training_fname
    training_set = pickle_util.load(options.training_fname)

    print "Training on", len(training_set.observations), "examples"

    global evaluator
    evaluator = Evaluator(options.model_fname, options.cost_function_class,
                          training_set, options.corpus_fname,
                          options.state_type)
    results = []

    args = []
    for i in range(1, 100, 10):
        fraction = float(i) / 100
        args.append((fraction, ))

    pool = pool_27.Pool(processes=2, maxtasksperchild=1)

    def callback(result):
        num_examples, cms = result
        print "***** finished results", num_examples
        results.append((num_examples, cms))

    #args = args[2:3]
    for arg in args:
        print "apply"
        pool.apply_async(evaluate, arg, callback=callback)
        #pool.apply_sync(evaluate, arg, callback=callback)

    while len(results) < len(args):
        time.sleep(1)

    print "closing"
    pool.close()
    print "joining"
    pool.join()

    fname = "confusion_matrices.pck"
    print "saving", fname
    pickle_util.save(fname, results)
예제 #3
0
    def save(model, fname):
        python_fname, model.java_fname = fnames_from_root(fname)

        model.java_crf.saveModel(model.java_fname)
        tmp = model.java_crf
        model.loadWeights()
        print 'loading', len(model.featureNames), 'features'

        model.java_crf = None
        pickle_util.save(python_fname, model)
        model.java_crf = tmp
예제 #4
0
    def saveNodeResults(self):
        entries = self.nodeResultsModel.entries
        for e in entries:
            e.ggg = None
            e.annotation = None
            #e.results = [e.results[0]] + random.sample(e.results[1:], 50)
            for r in e.results:
                r.end_ggg.remove_cost_entries()

        fname = "node_results.pck"
        print "saving", fname, "..."
        pickle_util.save("node_results.pck", entries)
        print "done"
def main():
    from optparse import OptionParser
    parser = OptionParser()

    parser.add_option("--dfactor", dest="dfactor", default=36)
    (options, args) = parser.parse_args()

    original_dataset_fname = args[0]
    convert_dataset_fname = args[1]
    save_fname = args[2]
    original_dataset = pickle_util.load(original_dataset_fname)
    convert_dataset = pickle_util.load(convert_dataset_fname)
    obs = convert_dataset.observations
    discrete_dataset = original_dataset.to_discrete_dataset(
        dataset=obs, dfactor=int(options.dfactor))
    print "saving to...", save_fname
    print "...with ", len(discrete_dataset.fnames), "features"
    pickle_util.save(save_fname, discrete_dataset)
    print "done"
예제 #6
0
    def resultFunction(fname, check=True):
        if fname.endswith(".pck"):
            return pickle_util.load(fname)
        else:
            pickleFname = "/tmp/%s_%s.pck" % (getuser(), fname.replace(
                "/", "_"))
            #os.path.basename(fname))

            if (os.path.exists(pickleFname) and
                    os.path.getmtime(pickleFname) > os.path.getmtime(fname)):
                try:
                    print "loading", pickleFname, "...",
                    obj = pickle_util.load(pickleFname)
                    print "done"
                    return obj
                except:
                    print "ignoring, and reloading from yaml"
                    traceback.print_exc()
            print "loading", fname, "then will dump", pickleFname, "..."
            annotations = loadFunction(fname, check=check)
            pickle_util.save(pickleFname, annotations)
            print 'done'
            return annotations
예제 #7
0
 def saveState(self):
     fname = QFileDialog.getSaveFileName(self)
     pickle_util.save(fname, self.state)
예제 #8
0
def main(forceFixedTestSet=False):
    forceFixedTestSet=True
    recipes = list(Corpus("data/"))
    recipes.sort(key=lambda ar: ar.name)
    test_set_names = ("Afghan Biscuits Daniela", "Easy Oatmeal Cookies",
                      "Quick'N Easy Sugar Cookies", "Quick and Easy Meatloaf")
    #Used for testing for paper
    if forceFixedTestSet:
        #2/15
        test_set_names = ("Quick and Easy Meatloaf", "Simply Brownie Recipe", "Quick'N Easy Sugar Cookies",
                      "Cake Mix Cookies", "Yellow Cake #1", "Easy Oatmeal Cookies", "Afghan Biscuits Daniela",
                      "Easy Platz (Coffee Cake)", "Easy Bread Pudding", "Deep Dish Brownies",
                      "Fudgy Fudge Brownies", "Simple Cocoa Brownies", "Chocolate Fudge Cookies",
                      "Pan Fudge Cake", "Sugar Cookies")
        #5/15
        test_set_names_better = ('Afghan Biscuits', 'Almond Crescent Cookies', 'Brownies #1',
             'CAKE MIX COOKIES', 'Chewiest Brownies #2', 'Chocolate Fudge Cookies',
             'Cracked Sugar Cookies I', 'Easy Bread Pudding', 'Easy Sugar Cookies',
             'Flourless Peanut Butter Cookies', 'Fudge Crinkles',
             'Healthy and Easy Turkey Meatloaf', 'Pan Fudge Cake', "INCOMPLETE")
                                 
        #4/15
        test_set_average = ('Cake Mix Cookies VII', 'Chocolate Afghans',
                            'Chocolate Chippers 1', 'Chocolate Fudge Cookies',
                            'Chocolate Peanut Butter Pudding Cookies',
                            'Easiest Brownies Ever', 'Easy Bread Pudding',
                            'Easy Oatmeal Cookies', 'Easy Sugar Cookies',
                            'Fudge Crinkles', 'Fudgy Fudge Brownies',
                            'Intensely Chocolate Cocoa Brownies', 'Peach Cobbler #1',
                            'Simple Brownie Recipe', 'Sugar and Spice Cookies')
        
        test_set_names = test_set_average
        
    forced_test_set_recipes = [r for r in recipes if r.name in test_set_names]
    other_recipes = [r for r in recipes if not r.name in test_set_names]
    
    assert len(other_recipes) + len(forced_test_set_recipes) == len(recipes)
    assert len(forced_test_set_recipes) == len(test_set_names)
    training = []
    testing = []

    for i, recipe in enumerate(other_recipes):
        if i % 4 in (0, 1, 2):
            training.append(recipe)
        else:
            if forceFixedTestSet:
                training.append(recipe)
            else:
                testing.append(recipe)

    testing.extend(forced_test_set_recipes)


    training_dataset = recipesToDataset(training)
    pickle_util.save("training.pck", training_dataset)

    testing_dataset = recipesToDataset(testing)
    pickle_util.save("testing.pck", testing_dataset)


    train_lccrf(training_dataset, "kitchenModel.pck", sigma=1.5)    

    print "training on", len(training)
    print "testing on", len(testing)
예제 #9
0
def main():

    parser = OptionParser()

    parser.add_option("--outfile_training",
                      dest="training_fname",
                      help="Training Output Filename")
    parser.add_option("--outfile_test",
                      dest="testing_fname",
                      help="Test Output Filename")
    parser.add_option(
        "--infile_positive",
        dest="positive_fnames",
        action="append",
        default=[],
        help="Positive Filename; default to True if isGroundingCorrect is None"
    )
    parser.add_option(
        "--infile_negative",
        dest="negative_fnames",
        action="append",
        default=[],
        help="Negative Filename; default to False if isGroundingCorrect is None"
    )
    parser.add_option(
        "--infile_labeled",
        dest="labeled_fnames",
        action="append",
        default=[],
        help="Labeled examples; skip if isGroundingCorrect is None")

    parser.add_option("--infile_unlabeled",
                      dest="unlabeled_fnames",
                      action="append",
                      default=[],
                      help="unlabeld fnames")

    parser.add_option("--feature_extractor",
                      dest="feature_extractor",
                      help="Feature Extractor Class")

    parser.add_option("--split",
                      dest="split",
                      type="string",
                      help="'random' to split randomly; 'scenario' to split " +
                      "by scenario.")

    parser.add_option(
        "--training_examples",
        dest="training_examples",
        action="append",
        help=
        "Examples that are in the training set; others go in the test set.  Can be passed more than once. "
    )

    (options, args) = parser.parse_args()

    try:
        from g3.feature_extractor.esdc_features import EsdcFeatures
        from g3.feature_extractor.esdc_flattened_features import EsdcFlattenedFeatures
        from g3.feature_extractor.grounded_features import GGGFeatures
        from g3.feature_extractor.rl_features import RLFeatures
        from g3.feature_extractor.bolt_features import BoltFeatures
        from g3.feature_extractor.ikea_features import IkeaFeatures
        from g3.feature_extractor.sr_features import SrFeatures
        #feature_extractor = semantic_map.esdc_semantic_map2.esdc_semantic_map()
        feature_extractor_cls = eval(options.feature_extractor)
        feature_extractor = feature_extractor_cls()
    except:
        print "error doing", options.feature_extractor
        raise

    observations = list()

    for positive_fname in options.positive_fnames:
        corpus = annotationIo.load(positive_fname)
        new_examples = generate_examples(basename(positive_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=True)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` positive_fname `)
        observations.extend(new_examples)

    for negative_fname in options.negative_fnames:
        corpus = annotationIo.load(negative_fname)
        new_examples = generate_examples(basename(negative_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=False)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` negative_fname `)

        observations.extend(new_examples)

    for labeled_fname in options.labeled_fnames:
        corpus = annotationIo.load(labeled_fname, check=False)
        new_examples = generate_examples(basename(labeled_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=None)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` labeled_fname `)
        observations.extend(new_examples)

    for unlabeled_fname in options.unlabeled_fnames:
        corpus = annotationIo.load(unlabeled_fname)
        new_examples = generate_examples(basename(unlabeled_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=None,
                                         force_default_class_value=True)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` unlabeled_fname `)
        observations.extend(new_examples)

    if options.split == "scenario":
        mturkCorpus = readCorpus.Corpus(
            "%s/data/corpusCommandsForVideoSmallFilesOnly/" % SLU_HOME)
        scenario_names = list(
            set(
                mturkCorpus.assignmentForId(
                    obs.annotation.assignmentId.split("_")[0]).scenario.name
                for obs in observations))
        random.shuffle(scenario_names)

        n_training_scenarios = int(ceil(len(scenario_names) * 0.7))

        training_scenarios = scenario_names[:n_training_scenarios]
        testing_scenarios = scenario_names[n_training_scenarios:]

        training = [
            o for o in observations if mturkCorpus.assignmentForId(
                o.annotation.assignmentId.split("_")[0]).scenario.name in
            training_scenarios
        ]

        testing = [
            o for o in observations if mturkCorpus.assignmentForId(
                o.annotation.assignmentId.split("_")[0]).scenario.name in
            testing_scenarios
        ]
    elif options.split == "annotation":
        '''
        Splits the examples, grouped by annotation.
        If the spatial relations corpus is included,
        that data will be in the training set only.
        '''
        training = []
        testing = []
        sr_ids = []
        ids = []

        for o in observations:
            aid = o.annotation.id
            if ((aid not in ids) and ("sr_" not in aid)):
                ids.append(aid)
            elif "sr_" in aid:
                sr_ids.append(aid)

        random.shuffle(ids)
        n_training_ids = int(ceil(len(ids) * 0.7))

        training_ids = ids[:n_training_ids]
        testing_ids = ids[n_training_ids:]

        training = [
            o for o in observations if o.annotation.id in training_ids
            or o.annotation.assignmentId in sr_ids
        ]
        testing = [o for o in observations if o.annotation.id in testing_ids]
    elif options.split == "random":
        random.shuffle(observations)
        n_training = int(ceil(len(observations) * 0.7))
        training = observations[0:n_training]
        testing = observations[n_training:]
    elif options.split == "labeled_annotation":
        training_ids = set()
        training = []
        testing = []
        for training_fname in options.training_examples:
            ds = pickle_util.load(training_fname)
            for ex in ds.observations:

                training_ids.add(ex.annotation.id)
                training_ids.add(ex.annotation.id.split("_")[0])
        print "training", training_ids
        for example in observations:
            if example.annotation.id in training_ids:
                training.append(example)
            else:
                aid = example.annotation.id.split("_")[0]
                if aid in training_ids:
                    training.append(example)
                else:
                    print "skipping", example.annotation.id, aid
                    testing.append(example)
        print "labeled training", len(training)
        print "labeled testing", len(testing)
    elif options.split == "labeled_file":
        training = []
        testing = []
        for example in observations:
            if "training" in example.annotation.fname:
                training.append(example)
            elif "testing" in example.annotation.fname:
                testing.append(example)
            else:
                training.append(example)

    elif options.split == "labeled":
        training_ids = set()
        training = []
        testing = []
        for training_fname in options.training_examples:
            ds = pickle_util.load(training_fname)
            for ex in ds.observations:
                print "id", ex.id
                training_ids.add(ex.id)

        for example in observations:
            print "example", example.id
            if example.id in training_ids:

                training.append(example)
            else:
                testing.append(example)

    else:
        raise ValueError("Unexpected split type: " + ` options.split `)

    training_dataset = ContinuousDataset(training, feature_extractor_cls)
    testing_dataset = ContinuousDataset(testing, feature_extractor_cls)

    print "saving ", len(training), " examples to:", options.training_fname
    pickle_util.save(options.training_fname, training_dataset)

    print "saving ", len(testing), " examples to:", options.testing_fname
    pickle_util.save(options.testing_fname, testing_dataset)
예제 #10
0
 def save(model, fname):
     pickle_util.save(fname, model)