def test_serialize_config_language_specific(): """Test that config serialization works as expected with language-specific factories.""" name = "test_serialize_config_language_specific" @English.factory(name, default_config={"foo": 20}) def custom_factory(nlp: Language, name: str, foo: int): return lambda doc: doc nlp = Language() assert not nlp.has_factory(name) nlp = English() assert nlp.has_factory(name) nlp.add_pipe(name, config={"foo": 100}, name="bar") pipe_config = nlp.config["components"]["bar"] assert pipe_config["foo"] == 100 assert pipe_config["factory"] == name with make_tempdir() as d: nlp.to_disk(d) nlp2 = spacy.load(d) assert nlp2.has_factory(name) assert nlp2.pipe_names == ["bar"] assert nlp2.get_pipe_meta("bar").factory == name pipe_config = nlp2.config["components"]["bar"] assert pipe_config["foo"] == 100 assert pipe_config["factory"] == name config = Config().from_str(nlp2.config.to_str()) config["nlp"]["lang"] = "de" with pytest.raises(ValueError): # German doesn't have a factory, only English does load_model_from_config(config)
def test_pipe_factories_language_specific(): """Test that language sub-classes can have their own factories, with fallbacks to the base factories.""" name1 = "specific_component1" name2 = "specific_component2" Language.component(name1, func=lambda: "base") English.component(name1, func=lambda: "en") German.component(name2, func=lambda: "de") assert Language.has_factory(name1) assert not Language.has_factory(name2) assert English.has_factory(name1) assert not English.has_factory(name2) assert German.has_factory(name1) assert German.has_factory(name2) nlp = Language() assert nlp.create_pipe(name1)() == "base" with pytest.raises(ValueError): nlp.create_pipe(name2) nlp_en = English() assert nlp_en.create_pipe(name1)() == "en" with pytest.raises(ValueError): nlp_en.create_pipe(name2) nlp_de = German() assert nlp_de.create_pipe(name1)() == "base" assert nlp_de.create_pipe(name2)() == "de"
def test_component_decorator_assigns(): @Language.component("c1", assigns=["token.tag", "doc.tensor"]) def test_component1(doc): return doc @Language.component("c2", requires=["token.tag", "token.pos"], assigns=["token.lemma", "doc.tensor"]) def test_component2(doc): return doc @Language.component("c3", requires=["token.lemma"], assigns=["token._.custom_lemma"]) def test_component3(doc): return doc assert Language.has_factory("c1") assert Language.has_factory("c2") assert Language.has_factory("c3") nlp = Language() nlp.add_pipe("c1") nlp.add_pipe("c2") problems = nlp.analyze_pipes()["problems"] assert problems["c2"] == ["token.pos"] nlp.add_pipe("c3") assert get_attr_info(nlp, "doc.tensor")["assigns"] == ["c1", "c2"] nlp.add_pipe("c1", name="c4") test_component4_meta = nlp.get_pipe_meta("c1") assert test_component4_meta.factory == "c1" assert nlp.pipe_names == ["c1", "c2", "c3", "c4"] assert not Language.has_factory("c4") assert nlp.pipe_factories["c1"] == "c1" assert nlp.pipe_factories["c4"] == "c1" assert get_attr_info(nlp, "doc.tensor")["assigns"] == ["c1", "c2", "c4"] assert get_attr_info(nlp, "token.pos")["requires"] == ["c2"] assert nlp("hello world")
def test_component_factories_class_func(): """Test that class components can implement a from_nlp classmethod that gives them access to the nlp object and config via the factory.""" class TestComponent5: def __call__(self, doc): return doc mock = Mock() mock.return_value = TestComponent5() def test_componen5_factory(nlp, foo: str = "bar", name="c5"): return mock(nlp, foo=foo) Language.factory("c5", func=test_componen5_factory) assert Language.has_factory("c5") nlp = Language() nlp.add_pipe("c5", config={"foo": "bar"}) assert nlp("hello world") mock.assert_called_once_with(nlp, foo="bar")