Esempio n. 1
0
    def __init__(
        self,
        config: Config,
        *,
        rank: int = 0,
        num_workers: int = 1,
        use_gpu: int = 0,
        ray=None,
    ):
        if ray is None:
            # Avoid importing ray in the module. This allows a test-ray to
            # be passed in, and speeds up the CLI.
            import ray  # type: ignore

            self.ray = ray
        self.rank = rank
        self.num_workers = num_workers
        self.gpu_id = self._resolve_gpu(use_gpu)
        self.nlp = init_nlp(Config(config), use_gpu=self.gpu_id)
        config = self.nlp.config.interpolate()
        self.T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
        dot_names = [self.T["train_corpus"], self.T["dev_corpus"]]
        self.train_corpus, self.dev_corpus = resolve_dot_names(config, dot_names)
        self.before_to_disk = create_before_to_disk_callback(self.T["before_to_disk"])
        allocator = self.T["gpu_allocator"]
        if use_gpu >= 0 and allocator:
            set_gpu_allocator(allocator)
        self._evaluation_callback = lambda: {}
        self._results = []
        self._has_evaluation_callback = False
        self.thread = None
        self.proxy = None
        self.n_grads_used = 0
        self.n_grads_discarded = 0
Esempio n. 2
0
def test_issue6908(component_name):
    """Test intializing textcat with labels in a list"""

    def create_data(out_file):
        nlp = spacy.blank("en")
        doc = nlp.make_doc("Some text")
        doc.cats = {"label1": 0, "label2": 1}
        out_data = DocBin(docs=[doc]).to_bytes()
        with out_file.open("wb") as file_:
            file_.write(out_data)

    with make_tempdir() as tmp_path:
        train_path = tmp_path / "train.spacy"
        create_data(train_path)
        config_str = CONFIG_ISSUE_6908.replace("TEXTCAT_PLACEHOLDER", component_name)
        config_str = config_str.replace("TRAIN_PLACEHOLDER", train_path.as_posix())
        config = load_config_from_str(config_str)
        init_nlp(config)
def test_init_nlp(config_string):
    @spacy.registry.readers.register("toy_tagger_data.v1")
    def read_tagger_data():
        def parse_data(nlp, index):
            ex = TRAIN_TAGGER_DATA[index]
            yield Example.from_dict(nlp.make_doc(ex[0]), ex[1])

        return {
            "train": partial(parse_data, index=0),
            "dev": partial(parse_data, index=1),
        }

    config = spacy.util.load_config_from_str(config_string, interpolate=False)
    config = Config(DEFAULT_CONFIG,
                    section_order=CONFIG_SECTION_ORDER).merge(config)
    nlp = init_nlp(config, use_gpu=False)
    assert nlp is not None