Example #1
0
 def train_test(self, model_type, compare=True):
     scores = []
     p = None
     for mode in "train", "load":
         print("-- %sing %s" % (mode, model_type))
         p = Parser(model_file="test_files/models/%s" % model_type, model_type=model_type)
         p.train(self.load_passages() if mode == "train" else None, iterations=200)
         score = evaluation.Scores.aggregate([s for _, s in p.parse(self.load_passages(), evaluate=True)])
         scores.append(score.average_f1())
         print()
     print("-- average labeled f1: %.3f, %.3f\n" % tuple(scores))
     if compare:
         self.assertAlmostEqual(*scores)
     p.parse(convert.to_text(self.load_passages()[0]))
     self.assertFalse(list(p.parse(())))  # parsing nothing returns nothing
Example #2
0
def test_empty_features(empty_features_config, model_type):
    filename = "test_files/models/%s_%s_empty_features" % (FORMATS[0], model_type)
    remove_existing(filename)
    empty_features_config.update(dict(classifier=model_type))
    passages = list(map(load_passage, passage_files(FORMATS[0])))
    p = Parser(model_files=filename, config=empty_features_config)
    list(p.train(passages, dev=passages, test=True, iterations=2))
    list(p.parse(passages, evaluate=True))
Example #3
0
def test_copy_shared(config, model_type):
    filename = "test_files/models/%s_%s_copy_shared" % ("_".join(FORMATS), model_type)
    remove_existing(filename)
    config.update(dict(classifier=model_type, lstm_layers=0, copy_shared=[FORMATS[0]]))
    for formats in ((FORMATS[0],), FORMATS):
        p = Parser(model_files=filename, config=config)
        passages = list(map(load_passage, passage_files(*formats)))
        list(p.train(passages, dev=passages, test=True, iterations=2))
        config.update_hyperparams(ucca={"lstm_layers": 1})
Example #4
0
def test_parser(config, model_type, formats, default_setting, text=True):
    filename = "test_files/models/%s_%s%s" % ("_".join(formats), model_type,
                                              default_setting.suffix())
    remove_existing(filename)
    config.update(default_setting.dict())
    scores = []
    params = []
    passages = list(map(load_passage, passage_files(*formats)))
    evaluate = ("amr" not in formats)
    for mode in "train", "load":
        print("-- %sing %s" % (mode, model_type))
        config.update(dict(classifier=model_type, copy_shared=None))
        p = Parser(model_files=filename, config=config)
        p.save_init = True
        list(
            p.train(passages if mode == "train" else None,
                    dev=passages,
                    test=True,
                    iterations=2))
        assert p.model.is_finalized, "Model should be finalized after %sing" % mode
        assert not getattr(p.model.feature_extractor, "node_dropout",
                           0), p.model.feature_extractor.node_dropout
        all_params = p.model.all_params()
        params.append(all_params)
        param1, param2 = [
            d.get("W") for d in (all_params, p.model.feature_extractor.params)
        ]
        if param1 is not None and param2 and param2.init is not None and not config.args.update_word_vectors:
            assert_allclose(param1,
                            weight_decay(p.model) * param2.init,
                            rtol=1e-6)
        text_results = results = list(p.parse(passages, evaluate=evaluate))
        if text:
            print("Converting to text and parsing...")
            text_results = list(
                p.parse([
                    p3 for p1 in passages
                    for p2 in convert.to_text(p1, sentences=False)
                    for p3 in convert.from_text(
                        p2, p1.ID, extra_format=p1.extra.get("format"))
                ]))
            assert len(results) == len(text_results)
        if evaluate:
            scores.append(Scores(tuple(zip(*results))[1]).average_f1())
            if text:
                for t, (r, s) in zip(text_results, results):
                    print("  %s F1=%.3f" % (r.ID, s.average_f1()))
        assert not list(p.parse(()))  # parsing nothing returns nothing
        print()
    assert_all_params_equal(*params)
    if evaluate:
        print("-- average f1: %.3f, %.3f\n" % tuple(scores))
        assert scores[0] == pytest.approx(scores[1], 0.1)
Example #5
0
def test_extra_classifiers(config, model_type, default_setting):
    filename = "test_files/models/%s_%s%s" % (FORMATS[0], model_type, default_setting.suffix())
    remove_existing(filename)
    config.update(default_setting.dict())
    passages = list(map(load_passage, passage_files(FORMATS[0])))
    for mode in "train", "load":
        print("-- %sing %s" % (mode, model_type))
        config.update(dict(classifier=model_type, copy_shared=None))
        p = Parser(model_files=filename, config=config)
        list(p.train(passages if mode == "train" else None, dev=passages, test=True, iterations=2))
        assert p.model.is_finalized, "Model should be finalized after %sing" % mode
        assert not getattr(p.model.feature_extractor, "node_dropout", 0), p.model.feature_extractor.node_dropout