Ejemplo n.º 1
0
def train_ner(output_dir: str,
              train_data_path: str,
              dev_data_path: str,
              test_data_path: str,
              run_test: bool = None,
              model: str = None,
              n_iter: int = 10,
              meta_overrides: str = None):

    util.fix_random_seed(util.env_opt("seed", 0))
    train_data = read_ner_from_tsv(train_data_path)
    dev_data = read_ner_from_tsv(dev_data_path)
    test_data = read_ner_from_tsv(test_data_path)
    os.makedirs(output_dir, exist_ok=True)
    if run_test:
        nlp = spacy.load(model)
        print("Loaded model '%s'" % model)
        evaluate_ner(nlp,
                     dev_data,
                     dump_path=os.path.join(output_dir, "dev_metrics.json"))
        evaluate_ner(nlp,
                     test_data,
                     dump_path=os.path.join(output_dir, "test_metrics.json"))
    else:
        train(model, train_data, dev_data, test_data, output_dir, n_iter,
              meta_overrides)
Ejemplo n.º 2
0
def main(model_path: str, dataset: str, output_path: str, code: Optional[str],
         med_mentions_folder_path: Optional[str]):
    if code is not None:
        # need to import code before loading a spacy model
        spec = importlib.util.spec_from_file_location(name, str(loc))
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)

    nlp = spacy.load(model_path)
    if dataset.startswith("medmentions"):
        train_data, dev_data, test_data = read_full_med_mentions(
            med_mentions_folder_path, None, False)
        data_split = dataset.split("-")[1]
        if data_split == "train":
            data = train_data
        elif data_split == "dev":
            data = dev_data
        elif data_split == "test":
            data = test_data
        else:
            raise Exception(f"Unrecognized split {data_split}")
    else:
        data = read_ner_from_tsv(dataset)

    evaluate_ner(nlp, data, dump_path=output_path)
Ejemplo n.º 3
0
def specialized_ner_reader(file_path: str):
    original_examples = read_ner_from_tsv(file_path)

    def corpus(nlp: Language):
        for original_example in original_examples:
            doc = nlp.make_doc(original_example[0])
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=UserWarning)
                spacy_example = Example.from_dict(doc, original_example[1])
            yield spacy_example

    return corpus
Ejemplo n.º 4
0
    def test_read_ner_from_tsv(self):

        data = read_ner_from_tsv(self.ner_tsv)
        assert len(data) == 4       
        example = data[0]
        assert example[0] == 'Intraocular pressure in genetically distinct mice : an update and strain survey'
        assert example[1] ==  {'entities': [(24, 35, 'SO'), (45, 49, 'TAXON')]}
        example = data[1]
        assert example[0] == 'Abstract'
        assert example[1] ==  {'entities': []}
        example = data[2]
        assert example[0] == 'Background'
        assert example[1] ==  {'entities': []}
        example = data[3]
        assert example[0] == 'Little is known about genetic factors affecting intraocular pressure ( IOP ) in mice and other mammals .'
        assert example[1] ==  {'entities': [(22, 29, 'SO'), (80, 84, 'TAXON'), (95, 102, 'TAXON')]}