def train_ner(output_dir: str, train_data_path: str, dev_data_path: str, test_data_path: str, run_test: bool = None, model: str = None, n_iter: int = 10, meta_overrides: str = None): util.fix_random_seed(util.env_opt("seed", 0)) train_data = read_ner_from_tsv(train_data_path) dev_data = read_ner_from_tsv(dev_data_path) test_data = read_ner_from_tsv(test_data_path) os.makedirs(output_dir, exist_ok=True) if run_test: nlp = spacy.load(model) print("Loaded model '%s'" % model) evaluate_ner(nlp, dev_data, dump_path=os.path.join(output_dir, "dev_metrics.json")) evaluate_ner(nlp, test_data, dump_path=os.path.join(output_dir, "test_metrics.json")) else: train(model, train_data, dev_data, test_data, output_dir, n_iter, meta_overrides)
def main(model_path: str, dataset: str, output_path: str, code: Optional[str], med_mentions_folder_path: Optional[str]): if code is not None: # need to import code before loading a spacy model spec = importlib.util.spec_from_file_location(name, str(loc)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) nlp = spacy.load(model_path) if dataset.startswith("medmentions"): train_data, dev_data, test_data = read_full_med_mentions( med_mentions_folder_path, None, False) data_split = dataset.split("-")[1] if data_split == "train": data = train_data elif data_split == "dev": data = dev_data elif data_split == "test": data = test_data else: raise Exception(f"Unrecognized split {data_split}") else: data = read_ner_from_tsv(dataset) evaluate_ner(nlp, data, dump_path=output_path)
def specialized_ner_reader(file_path: str): original_examples = read_ner_from_tsv(file_path) def corpus(nlp: Language): for original_example in original_examples: doc = nlp.make_doc(original_example[0]) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) spacy_example = Example.from_dict(doc, original_example[1]) yield spacy_example return corpus
def test_read_ner_from_tsv(self): data = read_ner_from_tsv(self.ner_tsv) assert len(data) == 4 example = data[0] assert example[0] == 'Intraocular pressure in genetically distinct mice : an update and strain survey' assert example[1] == {'entities': [(24, 35, 'SO'), (45, 49, 'TAXON')]} example = data[1] assert example[0] == 'Abstract' assert example[1] == {'entities': []} example = data[2] assert example[0] == 'Background' assert example[1] == {'entities': []} example = data[3] assert example[0] == 'Little is known about genetic factors affecting intraocular pressure ( IOP ) in mice and other mammals .' assert example[1] == {'entities': [(22, 29, 'SO'), (80, 84, 'TAXON'), (95, 102, 'TAXON')]}