Ejemplo n.º 1
0
from medacy.ner.pipelines import SystematicReviewPipeline
from medacy.ner.model import Model
from medacy.pipeline_components import MetaMap
import logging,sys



# print logs
# logging.basicConfig(stream=sys.stdout,level=logging.DEBUG) #set level=logging.DEBUG for more information

#entity types
entities = ['CellLine','Dose','DoseDuration', 'DoseDurationUnits', 'DoseFrequency', 'DoseRoute', 'DoseUnits', 'Endpoint','EndpointUnitOfMeasure', 'GroupName', 'GroupSize', 'SampleSize', 'Sex', 'Species', 'Strain', 'TestArticle', 'TestArticlePurity', 'TestArticleVerification', 'TimeAtDose', 'TimeAtFirstDose', 'TimeAtLastDose', 'TimeEndpointAssessed', 'TimeUnits', 'Vehicle' ]

# training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
training_dataset = Dataset('/home/mahendrand/VE/TAC/data_TAC')
#set metamap path
metamap = MetaMap(metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap", convert_ascii=True)
training_dataset.metamap(metamap)

# pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities'])
pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities)
model = Model(pipeline, n_jobs=1) #distribute documents between 30 processes during training and prediction

model.fit(training_dataset)
model.cross_validate(num_folds = 5, dataset = training_dataset, write_predictions=True)

#location to store the clinical model
model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle')

#location to store the predictions
#model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset/metamap_predictions')
Ejemplo n.º 2
0
    level=logging.DEBUG)  #set level=logging.DEBUG for more information

# entities = ['Form','Route','Frequency', 'Reason', 'Duration', 'Dosage', 'ADE', 'Strength', 'Drug' ]
entities = ['Symptom', 'Drug']

# training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
training_dataset = Dataset('/home/mahendrand/VE/Data/N2C2/symptom')

#training_dataset.set_data_limit(10)
# pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities'])
pipeline = ClinicalPipeline(metamap=None, entities=entities)
model = Model(
    pipeline, n_jobs=1
)  #distribute documents between 30 processes during training and prediction
#
model.fit(training_dataset)

model.cross_validate(num_folds=5,
                     training_dataset=training_dataset,
                     prediction_directory=True,
                     groundtruth_directory=True)

# model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle')
# model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/data_smmh4h/task2/training/metamap_predictions')

# model.predict(training_dataset)

# train_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
#
# print(train_dataset)