def test_serialize_custom_trainable_pipe(): class BadCustomPipe1(TrainablePipe): def __init__(self, vocab): pass class BadCustomPipe2(TrainablePipe): def __init__(self, vocab): self.vocab = vocab self.model = None class CustomPipe(TrainablePipe): def __init__(self, vocab, model): self.vocab = vocab self.model = model pipe = BadCustomPipe1(Vocab()) with pytest.raises(ValueError): pipe.to_bytes() with make_tempdir() as d: with pytest.raises(ValueError): pipe.to_disk(d) pipe = BadCustomPipe2(Vocab()) with pytest.raises(ValueError): pipe.to_bytes() with make_tempdir() as d: with pytest.raises(ValueError): pipe.to_disk(d) pipe = CustomPipe(Vocab(), Linear()) pipe_bytes = pipe.to_bytes() new_pipe = CustomPipe(Vocab(), Linear()).from_bytes(pipe_bytes) assert new_pipe.to_bytes() == pipe_bytes with make_tempdir() as d: pipe.to_disk(d) new_pipe = CustomPipe(Vocab(), Linear()).from_disk(d) assert new_pipe.to_bytes() == pipe_bytes
def test_serialize_parser_strings(Parser): vocab1 = Vocab() label = "FunnyLabel" assert label not in vocab1.strings cfg = {"model": DEFAULT_PARSER_MODEL} model = registry.resolve(cfg, validate=True)["model"] parser1 = Parser(vocab1, model) parser1.add_label(label) assert label in parser1.vocab.strings vocab2 = Vocab() assert label not in vocab2.strings parser2 = Parser(vocab2, model) parser2 = parser2.from_bytes(parser1.to_bytes(exclude=["vocab"])) assert label in parser2.vocab.strings
def test_serialize_parser_strings(Parser): vocab1 = Vocab() label = "FunnyLabel" assert label not in vocab1.strings config = { "learn_tokens": False, "min_action_freq": 0, "update_with_oracle_cut_size": 100, "beam_width": 1, "beam_update_prob": 1.0, "beam_density": 0.0, } cfg = {"model": DEFAULT_PARSER_MODEL} model = registry.resolve(cfg, validate=True)["model"] parser1 = Parser(vocab1, model, **config) parser1.add_label(label) assert label in parser1.vocab.strings vocab2 = Vocab() assert label not in vocab2.strings parser2 = Parser(vocab2, model, **config) parser2 = parser2.from_bytes(parser1.to_bytes(exclude=["vocab"])) assert label in parser2.vocab.strings
def test_issue4725_1(): """Ensure the pickling of the NER goes well""" vocab = Vocab(vectors_name="test_vocab_add_vector") nlp = English(vocab=vocab) config = { "update_with_oracle_cut_size": 111, } ner = nlp.create_pipe("ner", config=config) with make_tempdir() as tmp_path: with (tmp_path / "ner.pkl").open("wb") as file_: pickle.dump(ner, file_) assert ner.cfg["update_with_oracle_cut_size"] == 111 with (tmp_path / "ner.pkl").open("rb") as file_: ner2 = pickle.load(file_) assert ner2.cfg["update_with_oracle_cut_size"] == 111