Python Language.factory Examples

Programming Language: Python

Namespace/Package Name: spacy.language

Class/Type: Language

Method/Function: factory

Examples at hotexamples.com: 12

Python Language.factory - 12 examples found. These are the top rated real world Python examples of spacy.language.Language.factory extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Language(30)

make_doc(30)

initialize(30)

add_pipe(30)

create_pipe(25)

get_pipe(21)

to_disk(19)

component(17)

pipe(14)

factory(12)

update(10)

evaluate(9)

get_factory_meta(8)

begin_training(8)

tokenizer(7)

remove_pipe(6)

has_factory(4)

to_bytes(3)

rename_pipe(3)

from_disk(2)

analyze_pipes(2)

resume_training(2)

end_training(2)

get_pipe_meta(2)

from_bytes(1)

has_pipe(1)

max_length(1)

parser(1)

from_config(1)

enable_pipe(1)

default_lex_attrs(1)

replace_pipe(1)

select_pipes(1)

get_pipe_config(1)

disable_pipes(1)

disable_pipe(1)

create_optimizer(1)

Example #1

Show file

    def add_tagger(self, tagger, name, additional_fields=[]):
        r''' Add any kind of a tagger for tokens.

        Args:
            tagger (`object/function`):
                Any object/function that takes a spacy doc as an input, does something
                and returns the same doc.
            name (`str`):
                Name for this component in the pipeline.
            additional_fields (`List[str]`):
                Fields to be added to the `_` properties of a token.
        '''
        component_factory_name = spacy.util.get_object_name(tagger)
        Language.factory(name=component_factory_name,
                         default_config={"config": self.config},
                         func=tagger)
        self.nlp.add_pipe(component_factory_name,
                          name='tag_' + name,
                          first=True)
        # Add custom fields needed for this usecase
        Token.set_extension('to_skip', default=False, force=True)

        # Add any additional fields that are required
        for field in additional_fields:
            Token.set_extension(field, default=False, force=True)

Example #2

Show file

File: test_pipe_factories.py Project: mosynaq/spaCy

def test_language_factories_scores():
    name = "test_language_factories_scores"
    func = lambda nlp, name: lambda doc: doc
    weights1 = {"a1": 0.5, "a2": 0.5}
    weights2 = {"b1": 0.2, "b2": 0.7, "b3": 0.1}
    Language.factory(f"{name}1", default_score_weights=weights1, func=func)
    Language.factory(f"{name}2", default_score_weights=weights2, func=func)
    meta1 = Language.get_factory_meta(f"{name}1")
    assert meta1.default_score_weights == weights1
    meta2 = Language.get_factory_meta(f"{name}2")
    assert meta2.default_score_weights == weights2
    nlp = Language()
    nlp._config["training"]["score_weights"] = {}
    nlp.add_pipe(f"{name}1")
    nlp.add_pipe(f"{name}2")
    cfg = nlp.config["training"]
    expected_weights = {"a1": 0.25, "a2": 0.25, "b1": 0.1, "b2": 0.35, "b3": 0.05}
    assert cfg["score_weights"] == expected_weights
    # Test with custom defaults
    config = nlp.config.copy()
    config["training"]["score_weights"]["a1"] = 0.0
    config["training"]["score_weights"]["b3"] = 1.3
    nlp = English.from_config(config)
    score_weights = nlp.config["training"]["score_weights"]
    expected = {"a1": 0.0, "a2": 0.12, "b1": 0.05, "b2": 0.17, "b3": 0.65}
    assert score_weights == expected
    # Test with null values
    config = nlp.config.copy()
    config["training"]["score_weights"]["a1"] = None
    nlp = English.from_config(config)
    score_weights = nlp.config["training"]["score_weights"]
    expected = {"a1": None, "a2": 0.12, "b1": 0.05, "b2": 0.17, "b3": 0.66}
    assert score_weights == expected

Example #3

Show file

File: utils.py Project: Threepointone4/NLP

    def __init__(self):
        # Load spacy model
        self.spacy_obj = spacy.load(
            "en_core_web_sm",
            disable=["tok2vec", "tagger", "attribute_ruler", "lemmatizer"])

        # Language Detector Function
        def get_lang_detector(nlp, name):
            return LanguageDetector(seed=42)

        # # Create instance for language detection
        Language.factory("language_detector", func=get_lang_detector)
        self.spacy_obj.add_pipe('language_detector', last=True)

Example #4

Show file

File: test_pipe_factories.py Project: mosynaq/spaCy

def test_pipe_factories_config_excludes_nlp():
    """Test that the extra values we temporarily add to component config
    blocks/functions are removed and not copied around.
    """
    name = "test_pipe_factories_config_excludes_nlp"
    func = lambda nlp, name: lambda doc: doc
    Language.factory(name, func=func)
    config = {
        "nlp": {"lang": "en", "pipeline": [name]},
        "components": {name: {"factory": name}},
    }
    nlp = English.from_config(config)
    assert nlp.pipe_names == [name]
    pipe_cfg = nlp.get_pipe_config(name)
    pipe_cfg == {"factory": name}
    assert nlp._pipe_configs[name] == {"factory": name}

Example #5

Show file

File: spacy_plugin.py Project: unrea1-sama/self-attentive-parser

def register_benepar_component_factory():
    # Starting with spaCy 3.0, nlp.add_pipe no longer directly accepts
    # BeneparComponent instances. We must instead register a component factory.
    import spacy

    if spacy.__version__.startswith("2"):
        return

    from spacy.language import Language

    Language.factory(
        "benepar",
        default_config={
            "subbatch_max_tokens": 500,
            "disable_tagger": False,
        },
        func=create_benepar_component,
    )

Example #6

Show file

File: test_pipe_factories.py Project: kevinlu1248/spaCy

def test_pipe_factories_decorator_idempotent(i, func, func2):
    """Check that decorator can be run multiple times if the function is the
    same. This is especially relevant for live reloading because we don't
    want spaCy to raise an error if a module registering components is reloaded.
    """
    name = f"test_pipe_factories_decorator_idempotent_{i}"
    for i in range(5):
        Language.factory(name, func=func)
    nlp = Language()
    nlp.add_pipe(name)
    Language.factory(name, func=func)
    # Make sure it also works for component decorator, which creates the
    # factory function
    name2 = f"{name}2"
    for i in range(5):
        Language.component(name2, func=func2)
    nlp = Language()
    nlp.add_pipe(name)
    Language.component(name2, func=func2)

Example #7

Show file

File: test_analysis.py Project: Echinoidea/related-terms-search

def test_component_factories_class_func():
    """Test that class components can implement a from_nlp classmethod that
    gives them access to the nlp object and config via the factory."""
    class TestComponent5:
        def __call__(self, doc):
            return doc

    mock = Mock()
    mock.return_value = TestComponent5()

    def test_componen5_factory(nlp, foo: str = "bar", name="c5"):
        return mock(nlp, foo=foo)

    Language.factory("c5", func=test_componen5_factory)
    assert Language.has_factory("c5")
    nlp = Language()
    nlp.add_pipe("c5", config={"foo": "bar"})
    assert nlp("hello world")
    mock.assert_called_once_with(nlp, foo="bar")

Example #8

Show file

File: Code.py Project: ADA-EC/SEMEADA

            'APRESENTAR': 0.0,
            'FALAR SOBRE SEMEAR': 0.0,
            'FALAR SOBRE ADA': 0.0,
            'MÚSICA': 0.0,
            'SOLETRAR': 0.0,
            'DANÇAR': 1.0
        })

    baseDeDadosFinal.append([texto, dic.copy()])
"""
#Check do dicionário
print(len(baseDeDadosFinal))
print(baseDeDadosFinal[233][0])
print(baseDeDadosFinal[233][1])
"""
"""
Language.factory("language_detector", func=get_lang_detector)
nlp.add_pipe('language_detector')
"""

#Criando o classificador
modelo = spacy.blank('pt')
print(modelo.pipe_names)
"""
categorias = modelo.create_pipe()
categorias.add_label("ACORDAR")
categorias.add_label("DORMIR")
categorias.add_label("GIRAR")
categorias.add_label("LEVANTAR BRAÇOS")
categorias.add_label("PIADA")
categorias.add_label("FALAR O NOME")

Example #9

Show file

File: core.py Project: babylonhealth/hmrb

        nlp: object,
        name: str,
        callbacks: dict,
        sets: dict,
        map_doc: str,
        sort_length: bool,
        rules: str,
    ) -> SpacyCore:
        map_fn = registry.get(*map_doc.split("."))
        callbacks = {
            key: registry.get(*value.split("."))
            for key, value in callbacks.items()
        }
        core = SpacyCore(callbacks, sets, map_fn, sort_length)
        core.load(rules)
        return core

    Language.factory(
        "hmrb",
        default_config={
            "callbacks": {},
            "sets": {},
            "map_doc": _default_map,
            "sort_length": False,
            "rules": "",
        },
        func=spacy_factory,
    )
except (ImportError, AttributeError):
    logging.debug("disabling support for spaCy 3.0+")

Example #10

Show file

File: test_initialize.py Project: Echinoidea/related-terms-search

def test_initialize_arguments():
    name = "test_initialize_arguments"

    class CustomTokenizer:
        def __init__(self, tokenizer):
            self.tokenizer = tokenizer
            self.from_initialize = None

        def __call__(self, text):
            return self.tokenizer(text)

        def initialize(self, get_examples, nlp, custom: int):
            self.from_initialize = custom

    class Component:
        def __init__(self):
            self.from_initialize = None

        def initialize(self,
                       get_examples,
                       nlp,
                       custom1: str,
                       custom2: StrictBool = False):
            self.from_initialize = (custom1, custom2)

    Language.factory(name, func=lambda nlp, name: Component())

    nlp = English()
    nlp.tokenizer = CustomTokenizer(nlp.tokenizer)
    example = Example.from_dict(nlp("x"), {})
    get_examples = lambda: [example]
    nlp.add_pipe(name)
    # The settings here will typically come from the [initialize] block
    init_cfg = {"tokenizer": {"custom": 1}, "components": {name: {}}}
    nlp.config["initialize"].update(init_cfg)
    with pytest.raises(ConfigValidationError) as e:
        # Empty config for component, no required custom1 argument
        nlp.initialize(get_examples)
    errors = e.value.errors
    assert len(errors) == 1
    assert errors[0]["loc"] == ("custom1", )
    assert errors[0]["type"] == "value_error.missing"
    init_cfg = {
        "tokenizer": {
            "custom": 1
        },
        "components": {
            name: {
                "custom1": "x",
                "custom2": 1
            }
        },
    }
    nlp.config["initialize"].update(init_cfg)
    with pytest.raises(ConfigValidationError) as e:
        # Wrong type of custom 2
        nlp.initialize(get_examples)
    errors = e.value.errors
    assert len(errors) == 1
    assert errors[0]["loc"] == ("custom2", )
    assert errors[0]["type"] == "value_error.strictbool"
    init_cfg = {
        "tokenizer": {
            "custom": 1
        },
        "components": {
            name: {
                "custom1": "x"
            }
        },
    }
    nlp.config["initialize"].update(init_cfg)
    nlp.initialize(get_examples)
    assert nlp.tokenizer.from_initialize == 1
    pipe = nlp.get_pipe(name)
    assert pipe.from_initialize == ("x", False)

Example #11

Show file

File: term_extraction_pipeline.py Project: kevinlu1248/pyate

for key, value in MAPPING_TO_FUNCTION.items():

    def create_term_extraction_component(nlp: Language,
                                         name: str,
                                         force,
                                         args,
                                         kwargs,
                                         local_value=value):
        # fix based on Cell variable defined in loop (PYL-W0640)
        return TermExtractionPipeline(nlp, local_value, force, *args, **kwargs)

    Language.factory(
        key,
        func=copy.copy(create_term_extraction_component),
        default_config={
            "force": True,
            "args": [],
            "kwargs": {}
        },
    )


class TermExtractionPipeline:
    """
    This is for adding PyATE as a spaCy pipeline component.
    """
    def __init__(self,
                 nlp,
                 func: Callable[..., pd.Series] = combo_basic,
                 force: bool = True,
                 *args,

Example #12

Show file

import spacy
from spacy.matcher import Matcher
from spacy_langdetect import LanguageDetector
from spacy.language import Language
import pandas as pd
import difflib

nlp = spacy.load('en_core_web_md')
matcher = Matcher(nlp.vocab)


def create_lang_detector(nlp, name):
    return LanguageDetector()


Language.factory("language_detector", func=create_lang_detector)
nlp.add_pipe("language_detector", last=True)


def text_from_pdf(pdf_file):
    return extract_text(pdf_file)


def extract_names(text):
    doc = nlp(text)

    if doc._.language['language'] == 'en':
        nlp_new = spacy.load('en_core_web_md')
    else:
        nlp_new = spacy.load('pl_core_news_md')