Exemple #1
0
def test_language_factories_scores():
    name = "test_language_factories_scores"
    func = lambda nlp, name: lambda doc: doc
    weights1 = {"a1": 0.5, "a2": 0.5}
    weights2 = {"b1": 0.2, "b2": 0.7, "b3": 0.1}
    Language.factory(f"{name}1", default_score_weights=weights1, func=func)
    Language.factory(f"{name}2", default_score_weights=weights2, func=func)
    meta1 = Language.get_factory_meta(f"{name}1")
    assert meta1.default_score_weights == weights1
    meta2 = Language.get_factory_meta(f"{name}2")
    assert meta2.default_score_weights == weights2
    nlp = Language()
    nlp._config["training"]["score_weights"] = {}
    nlp.add_pipe(f"{name}1")
    nlp.add_pipe(f"{name}2")
    cfg = nlp.config["training"]
    expected_weights = {"a1": 0.25, "a2": 0.25, "b1": 0.1, "b2": 0.35, "b3": 0.05}
    assert cfg["score_weights"] == expected_weights
    # Test with custom defaults
    config = nlp.config.copy()
    config["training"]["score_weights"]["a1"] = 0.0
    config["training"]["score_weights"]["b3"] = 1.3
    nlp = English.from_config(config)
    score_weights = nlp.config["training"]["score_weights"]
    expected = {"a1": 0.0, "a2": 0.12, "b1": 0.05, "b2": 0.17, "b3": 0.65}
    assert score_weights == expected
    # Test with null values
    config = nlp.config.copy()
    config["training"]["score_weights"]["a1"] = None
    nlp = English.from_config(config)
    score_weights = nlp.config["training"]["score_weights"]
    expected = {"a1": None, "a2": 0.12, "b1": 0.05, "b2": 0.17, "b3": 0.66}
    assert score_weights == expected
def test_pipe_factory_meta_config_cleanup():
    """Test that component-specific meta and config entries are represented
    correctly and cleaned up when pipes are removed, replaced or renamed."""
    nlp = Language()
    nlp.add_pipe("ner", name="ner_component")
    nlp.add_pipe("textcat")
    assert nlp.get_factory_meta("ner")
    assert nlp.get_pipe_meta("ner_component")
    assert nlp.get_pipe_config("ner_component")
    assert nlp.get_factory_meta("textcat")
    assert nlp.get_pipe_meta("textcat")
    assert nlp.get_pipe_config("textcat")
    nlp.rename_pipe("textcat", "tc")
    assert nlp.get_pipe_meta("tc")
    assert nlp.get_pipe_config("tc")
    with pytest.raises(ValueError):
        nlp.remove_pipe("ner")
    nlp.remove_pipe("ner_component")
    assert "ner_component" not in nlp._pipe_meta
    assert "ner_component" not in nlp._pipe_configs
    with pytest.raises(ValueError):
        nlp.replace_pipe("textcat", "parser")
    nlp.replace_pipe("tc", "parser")
    assert nlp.get_factory_meta("parser")
    assert nlp.get_pipe_meta("tc").factory == "parser"
def test_pipe_function_component():
    name = "test_component"

    @Language.component(name)
    def component(doc: Doc) -> Doc:
        return doc

    assert name in registry.factories
    nlp = Language()
    with pytest.raises(ValueError):
        nlp.add_pipe(component)
    nlp.add_pipe(name)
    assert name in nlp.pipe_names
    assert nlp.pipe_factories[name] == name
    assert Language.get_factory_meta(name)
    assert nlp.get_pipe_meta(name)
    pipe = nlp.get_pipe(name)
    assert pipe == component
    pipe = nlp.create_pipe(name)
    assert pipe == component
def test_pipe_class_component_init():
    name1 = "test_class_component1"
    name2 = "test_class_component2"

    @Language.factory(name1)
    class Component1:
        def __init__(self, nlp: Language, name: str):
            self.nlp = nlp

        def __call__(self, doc: Doc) -> Doc:
            return doc

    class Component2:
        def __init__(self, nlp: Language, name: str):
            self.nlp = nlp

        def __call__(self, doc: Doc) -> Doc:
            return doc

    @Language.factory(name2)
    def factory(nlp: Language, name=name2):
        return Component2(nlp, name)

    nlp = Language()
    for name, Component in [(name1, Component1), (name2, Component2)]:
        assert name in registry.factories
        with pytest.raises(ValueError):
            nlp.add_pipe(Component(nlp, name))
        nlp.add_pipe(name)
        assert name in nlp.pipe_names
        assert nlp.pipe_factories[name] == name
        assert Language.get_factory_meta(name)
        assert nlp.get_pipe_meta(name)
        pipe = nlp.get_pipe(name)
        assert isinstance(pipe, Component)
        assert isinstance(pipe.nlp, Language)
        pipe = nlp.create_pipe(name)
        assert isinstance(pipe, Component)
        assert isinstance(pipe.nlp, Language)
Exemple #5
0
    def test_add_meta_cat(self):
        PipeTests.undertest.add_meta_cat(PipeTests.meta_cat, "cat_name")

        self.assertEqual(PipeTests.meta_cat.name, Language.get_factory_meta(PipeTests.meta_cat.name).factory)
Exemple #6
0
    def test_add_linker(self):
        PipeTests.undertest.add_linker(PipeTests.linker)

        self.assertEqual(PipeTests.linker.name, Language.get_factory_meta(PipeTests.linker.name).factory)
Exemple #7
0
    def test_add_token_normalizer(self):
        PipeTests.undertest.add_token_normalizer(PipeTests.config, spell_checker=PipeTests.spell_checker)

        self.assertEqual("token_normalizer", Language.get_factory_meta("token_normalizer").factory)
Exemple #8
0
    def test_add_tagger(self):
        PipeTests.undertest.add_tagger(tagger=tag_skip_and_punct, name="tag_skip_and_punct", additional_fields=["is_punct"])

        self.assertEqual(tag_skip_and_punct.__name__, Language.get_factory_meta(tag_skip_and_punct.__name__).factory)
        self.assertEqual(PipeTests.config, Language.get_factory_meta(tag_skip_and_punct.__name__).default_config["config"])