def test_language_factories_scores(): name = "test_language_factories_scores" func = lambda nlp, name: lambda doc: doc weights1 = {"a1": 0.5, "a2": 0.5} weights2 = {"b1": 0.2, "b2": 0.7, "b3": 0.1} Language.factory(f"{name}1", default_score_weights=weights1, func=func) Language.factory(f"{name}2", default_score_weights=weights2, func=func) meta1 = Language.get_factory_meta(f"{name}1") assert meta1.default_score_weights == weights1 meta2 = Language.get_factory_meta(f"{name}2") assert meta2.default_score_weights == weights2 nlp = Language() nlp._config["training"]["score_weights"] = {} nlp.add_pipe(f"{name}1") nlp.add_pipe(f"{name}2") cfg = nlp.config["training"] expected_weights = {"a1": 0.25, "a2": 0.25, "b1": 0.1, "b2": 0.35, "b3": 0.05} assert cfg["score_weights"] == expected_weights # Test with custom defaults config = nlp.config.copy() config["training"]["score_weights"]["a1"] = 0.0 config["training"]["score_weights"]["b3"] = 1.3 nlp = English.from_config(config) score_weights = nlp.config["training"]["score_weights"] expected = {"a1": 0.0, "a2": 0.12, "b1": 0.05, "b2": 0.17, "b3": 0.65} assert score_weights == expected # Test with null values config = nlp.config.copy() config["training"]["score_weights"]["a1"] = None nlp = English.from_config(config) score_weights = nlp.config["training"]["score_weights"] expected = {"a1": None, "a2": 0.12, "b1": 0.05, "b2": 0.17, "b3": 0.66} assert score_weights == expected
def test_pipe_factory_meta_config_cleanup(): """Test that component-specific meta and config entries are represented correctly and cleaned up when pipes are removed, replaced or renamed.""" nlp = Language() nlp.add_pipe("ner", name="ner_component") nlp.add_pipe("textcat") assert nlp.get_factory_meta("ner") assert nlp.get_pipe_meta("ner_component") assert nlp.get_pipe_config("ner_component") assert nlp.get_factory_meta("textcat") assert nlp.get_pipe_meta("textcat") assert nlp.get_pipe_config("textcat") nlp.rename_pipe("textcat", "tc") assert nlp.get_pipe_meta("tc") assert nlp.get_pipe_config("tc") with pytest.raises(ValueError): nlp.remove_pipe("ner") nlp.remove_pipe("ner_component") assert "ner_component" not in nlp._pipe_meta assert "ner_component" not in nlp._pipe_configs with pytest.raises(ValueError): nlp.replace_pipe("textcat", "parser") nlp.replace_pipe("tc", "parser") assert nlp.get_factory_meta("parser") assert nlp.get_pipe_meta("tc").factory == "parser"
def test_pipe_function_component(): name = "test_component" @Language.component(name) def component(doc: Doc) -> Doc: return doc assert name in registry.factories nlp = Language() with pytest.raises(ValueError): nlp.add_pipe(component) nlp.add_pipe(name) assert name in nlp.pipe_names assert nlp.pipe_factories[name] == name assert Language.get_factory_meta(name) assert nlp.get_pipe_meta(name) pipe = nlp.get_pipe(name) assert pipe == component pipe = nlp.create_pipe(name) assert pipe == component
def test_pipe_class_component_init(): name1 = "test_class_component1" name2 = "test_class_component2" @Language.factory(name1) class Component1: def __init__(self, nlp: Language, name: str): self.nlp = nlp def __call__(self, doc: Doc) -> Doc: return doc class Component2: def __init__(self, nlp: Language, name: str): self.nlp = nlp def __call__(self, doc: Doc) -> Doc: return doc @Language.factory(name2) def factory(nlp: Language, name=name2): return Component2(nlp, name) nlp = Language() for name, Component in [(name1, Component1), (name2, Component2)]: assert name in registry.factories with pytest.raises(ValueError): nlp.add_pipe(Component(nlp, name)) nlp.add_pipe(name) assert name in nlp.pipe_names assert nlp.pipe_factories[name] == name assert Language.get_factory_meta(name) assert nlp.get_pipe_meta(name) pipe = nlp.get_pipe(name) assert isinstance(pipe, Component) assert isinstance(pipe.nlp, Language) pipe = nlp.create_pipe(name) assert isinstance(pipe, Component) assert isinstance(pipe.nlp, Language)
def test_add_meta_cat(self): PipeTests.undertest.add_meta_cat(PipeTests.meta_cat, "cat_name") self.assertEqual(PipeTests.meta_cat.name, Language.get_factory_meta(PipeTests.meta_cat.name).factory)
def test_add_linker(self): PipeTests.undertest.add_linker(PipeTests.linker) self.assertEqual(PipeTests.linker.name, Language.get_factory_meta(PipeTests.linker.name).factory)
def test_add_token_normalizer(self): PipeTests.undertest.add_token_normalizer(PipeTests.config, spell_checker=PipeTests.spell_checker) self.assertEqual("token_normalizer", Language.get_factory_meta("token_normalizer").factory)
def test_add_tagger(self): PipeTests.undertest.add_tagger(tagger=tag_skip_and_punct, name="tag_skip_and_punct", additional_fields=["is_punct"]) self.assertEqual(tag_skip_and_punct.__name__, Language.get_factory_meta(tag_skip_and_punct.__name__).factory) self.assertEqual(PipeTests.config, Language.get_factory_meta(tag_skip_and_punct.__name__).default_config["config"])