def setUpClass(cls): if importlib.util.find_spec('medacy_dataset_end') is None: raise ImportError( "medacy_dataset_end was not automatically installed for testing. See testing instructions for details." ) cls.training_directory = tempfile.mkdtemp() #set up train directory cls.prediction_directory = tempfile.mkdtemp( ) # set up predict directory dataset, entities = Dataset.load_external('medacy_dataset_end') cls.entities = entities cls.ann_files = [] #fill directory of training files for data_file in dataset.get_data_files(): file_name, raw_text, ann_text = (data_file.file_name, data_file.raw_path, data_file.ann_path) cls.ann_files.append(file_name + '.ann') with open( os.path.join(cls.training_directory, "%s.txt" % file_name), 'w') as f: f.write(raw_text) with open( os.path.join(cls.training_directory, "%s.ann" % file_name), 'w') as f: f.write(ann_text) #place only text files into prediction directory. with open( os.path.join(cls.prediction_directory, "%s.txt" % file_name), 'w') as f: f.write(raw_text)
def setUpClass(cls): if importlib.util.find_spec('medacy_dataset_end') is None: raise ImportError( "medacy_dataset_end was not automatically installed for testing. See testing instructions for details." ) cls.train_dataset, cls.entities = Dataset.load_external( 'medacy_dataset_end') cls.train_dataset.set_data_limit(1) cls.test_dataset, _ = Dataset.load_external('medacy_dataset_end') cls.test_dataset.set_data_limit(2) cls.prediction_directory = tempfile.mkdtemp( ) #directory to store predictions
def setUpClass(cls): """Loads END dataset and writes files to temp directory""" cls.test_dir = tempfile.mkdtemp() # set up temp directory cls.dataset, _, meta_data = Dataset.load_external('medacy_dataset_end') cls.entities = meta_data['entities'] cls.ann_files = [] # fill directory of training files for data_file in cls.dataset.get_data_files(): file_name, raw_text, ann_text = (data_file.file_name, data_file.raw_path, data_file.ann_path) cls.ann_files.append(file_name + '.ann') with open(join(cls.test_dir, "broken_ann_file.ann"), 'w') as f: f.write("This is clearly not a valid ann file") cls.ann_file_path_one = join(cls.test_dir, "ann1.ann") with open(cls.ann_file_path_one, "w+") as f: f.write(ann_text_one) cls.ann_file_path_two = join(cls.test_dir, "ann1.ann") with open(cls.ann_file_path_one, "w+") as f: f.write(ann_text_two) cls.ann_file_path_modified = join(cls.test_dir, "ann_mod.ann") with open(cls.ann_file_path_modified, "w+") as f: f.write(ann_text_one_modified) cls.ann_file_path_source = join(cls.test_dir, "ann_source.txt") with open(cls.ann_file_path_source, "w+") as f: f.write(ann_text_one_source)
def setUpClass(cls): if importlib.util.find_spec('medacy_dataset_end') is None: raise ImportError( "medacy_dataset_end was not automatically installed for testing. See testing instructions for details." ) cls.dataset, cls.entities = Dataset.load_external('medacy_dataset_end')
def setUpClass(cls): """ Loads END dataset and writes files to temp directory :return: """ cls.test_dir = tempfile.mkdtemp() # set up temp directory cls.dataset, cls.entities = Dataset.load_external('medacy_dataset_end') cls.ann_files = [] # fill directory of training files for data_file in cls.dataset.get_data_files(): file_name, raw_text, ann_text = (data_file.file_name, data_file.raw_path, data_file.ann_path) cls.ann_files.append(file_name + '.ann') with open(join(cls.test_dir, "broken_ann_file.ann"), 'w') as f: f.write("This is clearly not a valid ann file")
# This script demonstrates utilizing medaCy for a full model training/predictive/cross validation use-case. # > python training_predicting.py model_name # Will build a model named model_name with the pipeline and parameters defined below. This script places the model in # it's own directory along the models build log and model/pipeline parameters to keep results easily referencable during run time. # Once a sufficent model is produced, consider wrapping it up into a medaCy compatible model as defined the example guide. from medacy.model import Model from medacy.pipelines import SystematicReviewPipeline from medacy.data import Dataset from medacy.pipeline_components import MetaMap import logging, datetime, time, os, sys train_dataset, evaluation_dataset, entities = Dataset.load_external( 'medacy_dataset_tac_2018') if sys.argv[1] is None: exit(0) #For rapid model prototyping, will train and predict by simply running the script with a model name as a parameter. model_name = sys.argv[1] #name for the model, use underscores model_notes = "notes about the current model" #notes about current model to be stored in a model information file by this script. model_directory = "/home/username/named_entity_recognition/challenges/challenge_n/models/%s" % model_name.replace( " ", '_') if model_name is "" or os.path.isdir(model_directory): print("Model directory already exists, aborting") exit(0) else: os.mkdir(model_directory)