def parser(vocab): vocab.strings.add("ROOT") config = { "learn_tokens": False, "min_action_freq": 30, "update_with_oracle_cut_size": 100, } cfg = {"model": DEFAULT_PARSER_MODEL} model = registry.resolve(cfg, validate=True)["model"] parser = DependencyParser(vocab, model, **config) parser.cfg["token_vector_width"] = 4 parser.cfg["hidden_width"] = 32 # parser.add_label('right') parser.add_label("left") parser.initialize(lambda: [_parser_example(parser)]) sgd = Adam(0.001) for i in range(10): losses = {} doc = Doc(vocab, words=["a", "b", "c", "d"]) example = Example.from_dict(doc, { "heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"] }) parser.update([example], sgd=sgd, losses=losses) return parser
def test_issue3830_with_subtok(): """Test that the parser does have subtok label if learn_tokens=True.""" config = { "learn_tokens": True, } model = registry.resolve({"model": DEFAULT_PARSER_MODEL}, validate=True)["model"] parser = DependencyParser(Vocab(), model, **config) parser.add_label("nsubj") assert "subtok" not in parser.labels parser.initialize(lambda: [_parser_example(parser)]) assert "subtok" in parser.labels
def parser(en_vocab): config = { "learn_tokens": False, "min_action_freq": 30, "update_with_oracle_cut_size": 100, "beam_width": 1, "beam_update_prob": 1.0, "beam_density": 0.0, } cfg = {"model": DEFAULT_PARSER_MODEL} model = registry.resolve(cfg, validate=True)["model"] parser = DependencyParser(en_vocab, model, **config) parser.add_label("nsubj") return parser
def test_issue3830_no_subtok(): """Test that the parser doesn't have subtok label if not learn_tokens""" config = { "learn_tokens": False, "min_action_freq": 30, "update_with_oracle_cut_size": 100, } model = registry.resolve({"model": DEFAULT_PARSER_MODEL}, validate=True)["model"] parser = DependencyParser(Vocab(), model, **config) parser.add_label("nsubj") assert "subtok" not in parser.labels parser.initialize(lambda: [_parser_example(parser)]) assert "subtok" not in parser.labels
def parser(vocab): parser = DependencyParser(vocab) parser.cfg["token_vector_width"] = 4 parser.cfg["hidden_width"] = 32 # parser.add_label('right') parser.add_label("left") parser.begin_training([], **parser.cfg) sgd = Adam(NumpyOps(), 0.001) for i in range(10): losses = {} doc = Doc(vocab, words=["a", "b", "c", "d"]) gold = GoldParse(doc, heads=[1, 1, 3, 3], deps=["left", "ROOT", "left", "ROOT"]) parser.update([doc], [gold], sgd=sgd, losses=losses) return parser
def parser(en_vocab): parser = DependencyParser(en_vocab) parser.add_label("nsubj") parser.model, cfg = parser.Model(parser.moves.n_moves) parser.cfg.update(cfg) return parser