def __init__( self, config: Config, *, rank: int = 0, num_workers: int = 1, use_gpu: int = 0, ray=None, ): if ray is None: # Avoid importing ray in the module. This allows a test-ray to # be passed in, and speeds up the CLI. import ray # type: ignore self.ray = ray self.rank = rank self.num_workers = num_workers self.gpu_id = self._resolve_gpu(use_gpu) self.nlp = init_nlp(Config(config), use_gpu=self.gpu_id) config = self.nlp.config.interpolate() self.T = registry.resolve(config["training"], schema=ConfigSchemaTraining) dot_names = [self.T["train_corpus"], self.T["dev_corpus"]] self.train_corpus, self.dev_corpus = resolve_dot_names(config, dot_names) self.before_to_disk = create_before_to_disk_callback(self.T["before_to_disk"]) allocator = self.T["gpu_allocator"] if use_gpu >= 0 and allocator: set_gpu_allocator(allocator) self._evaluation_callback = lambda: {} self._results = [] self._has_evaluation_callback = False self.thread = None self.proxy = None self.n_grads_used = 0 self.n_grads_discarded = 0
def test_init_nlp(config_string): @spacy.registry.readers.register("toy_tagger_data.v1") def read_tagger_data(): def parse_data(nlp, index): ex = TRAIN_TAGGER_DATA[index] yield Example.from_dict(nlp.make_doc(ex[0]), ex[1]) return { "train": partial(parse_data, index=0), "dev": partial(parse_data, index=1), } config = spacy.util.load_config_from_str(config_string, interpolate=False) config = Config(DEFAULT_CONFIG, section_order=CONFIG_SECTION_ORDER).merge(config) nlp = init_nlp(config, use_gpu=False) assert nlp is not None
def test_optimizer_config(): cfg = Config().from_bytes(OPTIMIZER_CFG.encode("utf8")) optimizer = Optimizer.from_config(cfg)
def test_read_config(): byte_string = EXAMPLE_CONFIG.encode("utf8") cfg = Config().from_bytes(byte_string) assert cfg["optimizer"]["learn_rate"]["start"] == 0.1 assert cfg["pipeline"]["parser"]["factory"] == "parser" assert cfg["pipeline"]["parser"]["model"]["tok2vec"]["width"] == 128