Beispiel #1
0
def load():
    entities = [
        'Drug', 'Form', 'Route', 'ADE', 'Reason', 'Frequency', 'Duration',
        'Dosage', 'Strength'
    ]
    pipeline = ClinicalPipeline(entities=entities)
    model = Model(pipeline, n_jobs=1)
    model_directory = resource_filename('medacy_model_clinical_notes', 'model')
    model.load(
        os.path.join(model_directory,
                     'n2c2_2018_no_metamap_2018_12_22_16.49.17.pkl'))
    return model
Beispiel #2
0
def drug_extraction(img):
    model = Model.load_external('medacy_model_clinical_notes')
    all_text_as_list = text_from_image(img)
    all_text = ""
    for line in all_text_as_list:
        annotation = model.predict(all_text)
        print(annotation)
        all_text += line + " "
    print(all_text)
    annotation = model.predict(all_text)
    keys_del = []
    for key in annotation.annotations['entities'].keys():
        print(key)
        if (annotation.annotations['entities'][key][3]
                not in drugs_dict["drugs"]):
            keys_del.append(key)
    for key in keys_del:
        del annotation.annotations['entities'][key]
    return annotation
Beispiel #3
0
def main():
    parser = create_parser()
    opts, args = parser.parse_args()
    if len(args) != 1:
        parser.error("invalid number of arguments")

    model = Model.load_external('medacy_model_clinical_notes')

    sqlite_file = args[0]

    conn = sqlite3.connect(sqlite_file)
    cursor = conn.cursor()

    cursor.execute('SELECT ED_ENC_NUM, NOTE_TEXT FROM Documents')
    for i, (num, raw) in enumerate(cursor):
        annotation = model.predict(raw)
        #print (json.dumps(annotation.annotations))
        #entities = annotation.entities;
        conn.execute('UPDATE Documents SET Result=? WHERE ED_ENC_NUM = ?',
                     (json.dumps(annotation.annotations), num))
        if i % 20 == 0:
            conn.commit()
    conn.commit()
    conn.close()
    def test_prediction_with_testing_pipeline(self):
        """
        Constructs a model that memorizes an entity, predicts it on same file, writes to ann
        :return:
        """

        pipeline = TestingPipeline(entities=['tradename'])

        #train on Abelcet.ann
        model = Model(pipeline, n_jobs=1)
        model.fit(self.train_dataset)

        #predict on both
        model.predict(self.test_dataset,
                      prediction_directory=self.prediction_directory)

        second_ann_file = "%s.ann" % self.test_dataset.get_data_files(
        )[1].file_name
        annotations = Annotations(os.path.join(self.prediction_directory,
                                               second_ann_file),
                                  annotation_type='ann')
        self.assertIsInstance(annotations, Annotations)
Beispiel #5
0
#entity types
entities = ['Reason', 'ADE', 'Drug']
# entities = ['Symptom', 'Form', 'Route', 'Frequency', 'Duration', 'Dosage', 'Strength', 'Drug']
# dirPred = '/home/mahendrand/VE/Predictions/CV/N2C2'
training_dataset = Dataset('/home/mahendrand/VE/Data/N2C2/data')

#set metamap path
metamap = MetaMap(
    metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap",
    convert_ascii=True)
training_dataset.metamap(metamap)

# pipeline = ClinicalPipeline(metamap=metamap, entities=entities)
pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities)
model = Model(
    pipeline, n_jobs=1
)  #distribute documents between 30 processes during training and prediction

model.fit(training_dataset)

#cross validation
# model.cross_validate(num_folds = 5, training_dataset = training_dataset, prediction_directory=dirPred, groundtruth_directory=True)

#location to store the clinical model
# model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle')

#run on a separate testing dataset
testing_dataset_END = Dataset('/home/mahendrand/VE/Data/END/drug')

# location to store the predictions
model.predict(
Beispiel #6
0
from medacy.ner.pipelines import SystematicReviewPipeline
from medacy.ner.model import Model
from medacy.pipeline_components import MetaMap
import logging,sys



# print logs
# logging.basicConfig(stream=sys.stdout,level=logging.DEBUG) #set level=logging.DEBUG for more information

#entity types
entities = ['CellLine','Dose','DoseDuration', 'DoseDurationUnits', 'DoseFrequency', 'DoseRoute', 'DoseUnits', 'Endpoint','EndpointUnitOfMeasure', 'GroupName', 'GroupSize', 'SampleSize', 'Sex', 'Species', 'Strain', 'TestArticle', 'TestArticlePurity', 'TestArticleVerification', 'TimeAtDose', 'TimeAtFirstDose', 'TimeAtLastDose', 'TimeEndpointAssessed', 'TimeUnits', 'Vehicle' ]

# training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
training_dataset = Dataset('/home/mahendrand/VE/TAC/data_TAC')
#set metamap path
metamap = MetaMap(metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap", convert_ascii=True)
training_dataset.metamap(metamap)

# pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities'])
pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities)
model = Model(pipeline, n_jobs=1) #distribute documents between 30 processes during training and prediction

model.fit(training_dataset)
model.cross_validate(num_folds = 5, dataset = training_dataset, write_predictions=True)

#location to store the clinical model
model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle')

#location to store the predictions
#model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset/metamap_predictions')
Beispiel #7
0
# # training_dataset_1.metamap(metamap)
#
# model_1 = Model(pipeline, n_jobs=1)
# model_1.fit(training_dataset_1)

# #run on a separate testing dataset
# testing_dataset_1= Dataset('/home/mahendrand/VE/Data/CADEC_END/1/test')
# # location to store the predictions
# model.predict(testing_dataset_1, prediction_directory='/home/mahendrand/VE/Data/preds/5 fold/CADEC_END')
#
#
# #fold 2
training_dataset_2 = Dataset('/home/mahendrand/VE/Data/CADEC_END/2/train')
# training_dataset_2.metamap(metamap)
#
model_2 = Model(pipeline, n_jobs=1)
model_2.fit(training_dataset_2)

#run on a separate testing dataset
testing_dataset_2 = Dataset('/home/mahendrand/VE/Data/CADEC_END/2/test')
# location to store the predictions
model.predict(
    testing_dataset_2,
    prediction_directory='/home/mahendrand/VE/Data/preds/5 fold/CADEC_END')
#
#
# #fold 3
# training_dataset_3 = Dataset('/home/mahendrand/VE/Data/CADEC_END/3/train')
# training_dataset_3.metamap(metamap)
#
# model_3 = Model(pipeline, n_jobs=1)
Beispiel #8
0
#logging.basicConfig(filename=model_directory+'/build_%cd .log' % current_time,level=logging.DEBUG) #set level=logging.DEBUG for more information
logging.basicConfig(
    stream=sys.stdout,
    level=logging.DEBUG)  #set level=logging.DEBUG for more information

# entities = ['Form','Route','Frequency', 'Reason', 'Duration', 'Dosage', 'ADE', 'Strength', 'Drug' ]
entities = ['Symptom', 'Drug']

# training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
training_dataset = Dataset('/home/mahendrand/VE/Data/N2C2/symptom')

#training_dataset.set_data_limit(10)
# pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities'])
pipeline = ClinicalPipeline(metamap=None, entities=entities)
model = Model(
    pipeline, n_jobs=1
)  #distribute documents between 30 processes during training and prediction
#
model.fit(training_dataset)

model.cross_validate(num_folds=5,
                     training_dataset=training_dataset,
                     prediction_directory=True,
                     groundtruth_directory=True)

# model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle')
# model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/data_smmh4h/task2/training/metamap_predictions')

# model.predict(training_dataset)

# train_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
Beispiel #9
0
 def __init__(self, model='medacy_model_clinical_notes'):
     super().__init__()
     self.add_detector(self.annotate)
     self.model = Model.load_external(model)
    create_directory(dirTrain)

    print("Fold : ",i)

    for item in ann_files_1:
        shutil.copy(dataset1 + '/' + item, dirTrain)
    for item in ann_files_2:
        shutil.copy(dataset2 + '/' + item, dirTrain)
    for item in txt_files_1:
        shutil.copy(dataset1 + '/' + item, dirTrain)
    for item in txt_files_2:
        shutil.copy(dataset2 + '/' + item, dirTrain)

    for item in ann_files_1[i * num_files:(i + 1) * num_files]:
        shutil.copy(dataset1 + '/' + item, dirTest)
        os.remove(dirTrain + '/' + item)
    for item in txt_files_1[i * num_files:(i + 1) * num_files]:
        shutil.copy(dataset1 + '/' + item, dirTest)
        os.remove(dirTrain + '/' + item)

    training_dataset = Dataset(dirTrain)
    training_dataset.metamap(metamap)

    model = Model(pipeline, n_jobs=1)
    model.fit(training_dataset)

    # run on a separate testing dataset
    testing_dataset = Dataset(dirTest)

    # location to store the predictions
    model.predict(testing_dataset, prediction_directory = dirPrediction)