def __init__( self, config: Config, *, rank: int = 0, num_workers: int = 1, use_gpu: int = 0, ray=None, ): if ray is None: # Avoid importing ray in the module. This allows a test-ray to # be passed in, and speeds up the CLI. import ray # type: ignore self.ray = ray self.rank = rank self.num_workers = num_workers self.gpu_id = self._resolve_gpu(use_gpu) self.nlp = init_nlp(Config(config), use_gpu=self.gpu_id) config = self.nlp.config.interpolate() self.T = registry.resolve(config["training"], schema=ConfigSchemaTraining) dot_names = [self.T["train_corpus"], self.T["dev_corpus"]] self.train_corpus, self.dev_corpus = resolve_dot_names(config, dot_names) self.before_to_disk = create_before_to_disk_callback(self.T["before_to_disk"]) allocator = self.T["gpu_allocator"] if use_gpu >= 0 and allocator: set_gpu_allocator(allocator) self._evaluation_callback = lambda: {} self._results = [] self._has_evaluation_callback = False self.thread = None self.proxy = None self.n_grads_used = 0 self.n_grads_discarded = 0
def test_issue6908(component_name): """Test intializing textcat with labels in a list""" def create_data(out_file): nlp = spacy.blank("en") doc = nlp.make_doc("Some text") doc.cats = {"label1": 0, "label2": 1} out_data = DocBin(docs=[doc]).to_bytes() with out_file.open("wb") as file_: file_.write(out_data) with make_tempdir() as tmp_path: train_path = tmp_path / "train.spacy" create_data(train_path) config_str = CONFIG_ISSUE_6908.replace("TEXTCAT_PLACEHOLDER", component_name) config_str = config_str.replace("TRAIN_PLACEHOLDER", train_path.as_posix()) config = load_config_from_str(config_str) init_nlp(config)
def test_init_nlp(config_string): @spacy.registry.readers.register("toy_tagger_data.v1") def read_tagger_data(): def parse_data(nlp, index): ex = TRAIN_TAGGER_DATA[index] yield Example.from_dict(nlp.make_doc(ex[0]), ex[1]) return { "train": partial(parse_data, index=0), "dev": partial(parse_data, index=1), } config = spacy.util.load_config_from_str(config_string, interpolate=False) config = Config(DEFAULT_CONFIG, section_order=CONFIG_SECTION_ORDER).merge(config) nlp = init_nlp(config, use_gpu=False) assert nlp is not None