Ejemplo n.º 1
0
    def setUpClass(cls):

        if importlib.util.find_spec('medacy_dataset_end') is None:
            raise ImportError(
                "medacy_dataset_end was not automatically installed for testing. See testing instructions for details."
            )
        cls.training_directory = tempfile.mkdtemp()  #set up train directory
        cls.prediction_directory = tempfile.mkdtemp(
        )  # set up predict directory
        dataset, entities = Dataset.load_external('medacy_dataset_end')
        cls.entities = entities
        cls.ann_files = []

        #fill directory of training files
        for data_file in dataset.get_data_files():
            file_name, raw_text, ann_text = (data_file.file_name,
                                             data_file.raw_path,
                                             data_file.ann_path)
            cls.ann_files.append(file_name + '.ann')
            with open(
                    os.path.join(cls.training_directory, "%s.txt" % file_name),
                    'w') as f:
                f.write(raw_text)
            with open(
                    os.path.join(cls.training_directory, "%s.ann" % file_name),
                    'w') as f:
                f.write(ann_text)

            #place only text files into prediction directory.
            with open(
                    os.path.join(cls.prediction_directory,
                                 "%s.txt" % file_name), 'w') as f:
                f.write(raw_text)
Ejemplo n.º 2
0
    def setUpClass(cls):

        if importlib.util.find_spec('medacy_dataset_end') is None:
            raise ImportError(
                "medacy_dataset_end was not automatically installed for testing. See testing instructions for details."
            )

        cls.train_dataset, cls.entities = Dataset.load_external(
            'medacy_dataset_end')
        cls.train_dataset.set_data_limit(1)

        cls.test_dataset, _ = Dataset.load_external('medacy_dataset_end')
        cls.test_dataset.set_data_limit(2)

        cls.prediction_directory = tempfile.mkdtemp(
        )  #directory to store predictions
Ejemplo n.º 3
0
    def setUpClass(cls):
        """Loads END dataset and writes files to temp directory"""
        cls.test_dir = tempfile.mkdtemp()  # set up temp directory
        cls.dataset, _, meta_data = Dataset.load_external('medacy_dataset_end')
        cls.entities = meta_data['entities']
        cls.ann_files = []
        # fill directory of training files
        for data_file in cls.dataset.get_data_files():
            file_name, raw_text, ann_text = (data_file.file_name,
                                             data_file.raw_path,
                                             data_file.ann_path)
            cls.ann_files.append(file_name + '.ann')

        with open(join(cls.test_dir, "broken_ann_file.ann"), 'w') as f:
            f.write("This is clearly not a valid ann file")

        cls.ann_file_path_one = join(cls.test_dir, "ann1.ann")
        with open(cls.ann_file_path_one, "w+") as f:
            f.write(ann_text_one)

        cls.ann_file_path_two = join(cls.test_dir, "ann1.ann")
        with open(cls.ann_file_path_one, "w+") as f:
            f.write(ann_text_two)

        cls.ann_file_path_modified = join(cls.test_dir, "ann_mod.ann")
        with open(cls.ann_file_path_modified, "w+") as f:
            f.write(ann_text_one_modified)

        cls.ann_file_path_source = join(cls.test_dir, "ann_source.txt")
        with open(cls.ann_file_path_source, "w+") as f:
            f.write(ann_text_one_source)
Ejemplo n.º 4
0
    def setUpClass(cls):

        if importlib.util.find_spec('medacy_dataset_end') is None:
            raise ImportError(
                "medacy_dataset_end was not automatically installed for testing. See testing instructions for details."
            )

        cls.dataset, cls.entities = Dataset.load_external('medacy_dataset_end')
Ejemplo n.º 5
0
    def setUpClass(cls):
        """
        Loads END dataset and writes files to temp directory
        :return:
        """
        cls.test_dir = tempfile.mkdtemp()  # set up temp directory
        cls.dataset, cls.entities = Dataset.load_external('medacy_dataset_end')
        cls.ann_files = []
        # fill directory of training files
        for data_file in cls.dataset.get_data_files():
            file_name, raw_text, ann_text = (data_file.file_name, data_file.raw_path, data_file.ann_path)
            cls.ann_files.append(file_name + '.ann')

        with open(join(cls.test_dir, "broken_ann_file.ann"), 'w') as f:
            f.write("This is clearly not a valid ann file")
Ejemplo n.º 6
0
# This script demonstrates utilizing medaCy for a full model training/predictive/cross validation use-case.
# > python training_predicting.py model_name
# Will build a model named model_name with the pipeline and parameters defined below. This script places the model in
# it's own directory along the models build log and model/pipeline parameters to keep results easily referencable during run time.
# Once a sufficent model is produced, consider wrapping it up into a medaCy compatible model as defined the example guide.

from medacy.model import Model
from medacy.pipelines import SystematicReviewPipeline
from medacy.data import Dataset
from medacy.pipeline_components import MetaMap
import logging, datetime, time, os, sys

train_dataset, evaluation_dataset, entities = Dataset.load_external(
    'medacy_dataset_tac_2018')

if sys.argv[1] is None:
    exit(0)

#For rapid model prototyping, will train and predict by simply running the script with a model name as a parameter.
model_name = sys.argv[1]  #name for the model, use underscores
model_notes = "notes about the current model"  #notes about current model to be stored in a model information file by this script.

model_directory = "/home/username/named_entity_recognition/challenges/challenge_n/models/%s" % model_name.replace(
    " ", '_')

if model_name is "" or os.path.isdir(model_directory):
    print("Model directory already exists, aborting")
    exit(0)
else:
    os.mkdir(model_directory)