コード例 #1
0
ファイル: test_serialize_config.py プロジェクト: EricM2/venv
def test_config_optional_sections():
    config = Config().from_str(nlp_config_string)
    config = DEFAULT_CONFIG.merge(config)
    assert "pretraining" not in config
    filled = registry.fill(config, schema=ConfigSchema, validate=False)
    # Make sure that optional "pretraining" block doesn't default to None,
    # which would (rightly) cause error because it'd result in a top-level
    # key that's not a section (dict). Note that the following roundtrip is
    # also how Config.interpolate works under the hood.
    new_config = Config().from_str(filled.to_str())
    assert new_config["pretraining"] == {}
コード例 #2
0
    def __init__(self,
                 udpipe_model: UDPipeModel,
                 meta: Optional[Dict] = None,
                 **kwargs):
        """Initialize the Language class.

        The language is called "udpipe_en" instead of "en" in order to
        avoid any potential conflicts with spaCy's built-in languages.
        Using entry points, this enables serializing and deserializing
        the language class and "lang": "udpipe_en" in the meta.json will
        automatically instantiate this class if this package is available.

        udpipe_model: The loaded UDPipe model.
        meta: spaCy model metadata.
        kwargs: Optional config parameters.
        """
        self.udpipe = udpipe_model
        self.Defaults = get_defaults(lang=udpipe_model._lang)
        self.lang = f"udpipe_{udpipe_model._lang}"
        ignore_tag_map = kwargs.get("ignore_tag_map", False)
        if ignore_tag_map:
            self.Defaults.tag_map = {}  # workaround for ValueError: [E167]
        if SPACY_V3:
            from spacy.vocab import create_vocab
            from spacy.language import DEFAULT_CONFIG
            self.vocab = create_vocab(udpipe_model._lang, self.Defaults)
            self.batch_size = 1000
            self._components = []
            self._disabled = set()
            self._config = DEFAULT_CONFIG.merge(self.default_config)
        else:
            self.vocab = self.Defaults.create_vocab()
            self.pipeline = []
        self.tokenizer = UDPipeTokenizer(model=self.udpipe, vocab=self.vocab)
        self.max_length = kwargs.get("max_length", 10**6)
        self._meta = self.udpipe._meta if meta is None else dict(meta)
        self._path = None
        self._optimizer = None