def metamap(sent): """ Notes ----- - This is too slow because we instantiate mm and get_cui_indices for every datapoint - find a way around this """ metamap_dir = "/Users/nicolasahar/Projects/repos/public_mm/bin/metamap18" mm = MetaMap.get_instance( metamap_dir ) # import metamap: (should point to the public_mm_lite folder which contains metamaplite.sh) cui_indices = get_cui_indices("./data/raw/cui/all_cuis_conso.csv") concepts, error = mm.extract_concepts( [sent.sent], [1]) # can only pass in one sentence at a time to snorkel! if concepts: if float(concepts[0].score) < 1: # abstain if score < 1 return -1 cui = concepts[0].cui if cui in cui_indices["CUI"]: cui_index = (list(cui_indices["CUI"].keys())[list( cui_indices["CUI"].values()).index(cui)]) return cui_index + 1 # To avoid assigning index 0 a label of 0 return -1 else: return 0
def GetMetaMapSemanticTypes(metamap_path, annotations): """ Uses MetaMap to return a list of sematic types for each annotation @param metamap_path: Path to MetaMap installation @param annotations: List of concepts parsed from annotation file @return: List of lists of sematic types for each annotation """ #Extract concepts from the list of annotations using MetaMap metamap = MetaMap.get_instance(metamap_path) indexes = range(len(annotations)) concepts, error = metamap.extract_concepts(annotations, indexes) #List to hold a list of semantic types for each annotation anSemTypeList = [[] for x in range(len(annotations))] #Iterate over the list of concepts extracted from the list of annotations for concept in concepts: index = int(concept.index) if isinstance(concept, ConceptMMI): for semtype in concept.semtypes.strip('[]').split(','): if semtype not in anSemTypeList[index]: #Create a list of unique semantic types per annotation anSemTypeList[index].append(semtype) return anSemTypeList
def __init__(self): # self.mm = MetaMap.get_instance('/home/khyathi/installations/public_mm/bin/metamap') # self.start_command = "/home/khyathi/installations/public_mm/bin/skrmedpostctl start" # self.stop_command = "/home/khyathi/installations/public_mm/bin/skrmedpostctl stop" self.mm = MetaMap.get_instance(config.get('MetaMap', 'instance')) self.start_command = config.get('MetaMap', 'start') self.stop_command = config.get('MetaMap', 'stop')
def run_metamap(self): mm = MetaMap.get_instance(self.metamap_bin) sentence = self.term concepts, error = mm.extract_concepts( sentences=sentence, compute_all_mappings=False, prefer_multiple_concepts=False, mm_data_version="USAbase", term_processing=True, word_sense_disambiguation=True, silent=True) concept_term = pd.DataFrame({"name": None, "cui": None, "score": None, "semtypes": None}, index=range(0)) for index, concept in enumerate(concepts): # print() # print(index) # print(concept) if type(concept).__name__ is "ConceptMMI": # print(type(concept).__name__) # print(concept.preferred_name) concept_series = pd.Series({"name": concept.preferred_name, "cui": concept.cui, "score": concept.score, "semtypes": concept.semtypes}) # print(concept_series) concept_term = concept_term.append(concept_series, ignore_index=True) # print(concept_term) # print() concept_term.score = [float(item) for item in concept_term.score] # make score float. concept_term.sort_values(by="score", ascending=False, inplace=True) # sort by score. return concept_term
def ConceptExtract(self, sent_text): ''' sent_text: a list of sent text ''' mm = MetaMap.get_instance('./public_mm/bin/metamap16',version = 2016) self.concepts,_ = mm.extract_concepts(sent_text,word_sense_disambiguation=True,\ ignore_stop_phrases=True) self.scores,_ = mm.extract_concepts(sent_text,mmi_output=False,word_sense_disambiguation=True,\ ignore_stop_phrases=True)
def ConceptExtract(self, sent_text): ''' sent_text: a list of sent text mm: Path should be changed to MetaMap location. ''' mm = MetaMap.get_instance('/Users/sileshu/Downloads/public_mm/bin/metamap16',version = 2016) self.concepts,_ = mm.extract_concepts(sent_text,word_sense_disambiguation=True,\ ignore_stop_phrases=True) self.scores,_ = mm.extract_concepts(sent_text,mmi_output=False,word_sense_disambiguation=True,\ ignore_stop_phrases=True)
def __init__(self): config = configparser.ConfigParser() config.read('config.ini') # Read path to the MetaMap binary release from configuration meta_map_path = config.get('general', 'meta_map_path') self.meta_map = MetaMap.get_instance(meta_map_path) # Get relevant field names, i.e. score, cui, preferred_name... self.relevant_field_names = config.get( 'general', 'relevant_field_names').split(',')
def ConceptExtract(self, sent_text): ''' sent_text: a list of sent text mm: Path should be changed to MetaMap location. ''' ###mm = MetaMap.get_instance('/public_mm/bin/metamap16',version = 2016) mm = MetaMap.get_instance('/home/tay/Documents/DSA-NIST/NIST_Utils-master/public_mm/bin/metamap16',version = 2016) self.concepts,_ = mm.extract_concepts(sent_text,word_sense_disambiguation=True,\ ignore_stop_phrases=True) #self.scores,_ = mm.extract_concepts(sent_text,mmi_output=False,word_sense_disambiguation=True,\ # ignore_stop_phrases=True) #TP# self.scores,_ = mm.extract_concepts(sent_text, word_sense_disambiguation=True,\ ignore_stop_phrases=True) #TP#
def __init__(self): #config = configparser.ConfigParser() #config.read('/home/galiasn/DATA/Jonathan/new/metamap-project-master/config.ini') # Read path to the MetaMap binary release from configuration #meta_map_path = config.get('general', 'meta_map_path') meta_map_path = '/home/galiasn/DATA/MetaMap/public_mm/bin/metamap16' self.meta_map = MetaMap.get_instance(meta_map_path) # Get relevant field names, i.e. score, cui, preferred_name... #self.relevant_field_names = config.get('general', 'relevant_field_names').split(',') self.relevant_field_names = [ 'score', 'preferred_name', 'cui', 'semtypes' ]
def findEntity(self): # the server installed on your machine mm = MetaMap.get_instance('/work/tkakar/public_mm/bin/metamap14') #sample_Text = '/work/tkakar/FDAfirstNarrative.txt' rawText = self.Text #sents= self.Text concepts, error = mm.extract_concepts([rawText], word_sense_disambiguation=True) offset_list = [] drugs_list = [] drug_offset_pair = () for concept in concepts: c = concept.semtypes c = c.replace("[", "") c = c.replace("]", "") semTypes = c.strip().split(",") #print semTypes, type(semTypes) for semType in semTypes: if semType in ['phsu', 'orch']: token = concept.trigger.strip().split("-")[0] token = token.replace("[", "") #print concept.pos_info, "pos_info" offset = self.preprocess.offsetParse(concept.pos_info, ';') #print offset , "offset" , len(offset) for item in offset: #print item ,item[1] item[1] = item[0] + item[1] #print ("offsetMetamap" , item ) if item not in offset_list: offset_list.append(item) drugs_list.append(token) drugs_list = [drug.replace('"', "") for drug in drugs_list] #print len(drugs_list) elementList = [] for drug, offset in zip(drugs_list, offset_list): #print drug, type(drug), type(offset), [offset] elementList.append( DrugnameElement(drug, [offset], "DrugnameMetamapExtractor", "DRUGNAME")) #print len(elementList) return elementList
def metamap_wrapper(text): """ Function-wrapper for metamap binary. Extracts concepts found in text. !!!! REMEMBER TO START THE METAMAP TAGGER AND WordSense DISAMBIGUATION SERVER !!!! Input: - text: str, a piece of text or sentence Output: - a dictionary with key sents and values a list of the concepts found """ # Tokenize into sentences sents = sent_tokenize(text) # Load Metamap Instance mm = MetaMap.get_instance(settings['load']['path']['metamap']) concepts, errors = mm.extract_concepts(sents, range(len(sents))) # Keep the sentence ids ids = np.array([int(concept[0]) for concept in concepts]) sentences = [] for i in xrange(len(sents)): tmp = {'sent_id': i + 1, 'entities': [], 'relations': []} # Wanted concepts according to sentence wanted = np.where(ids == i)[0].tolist() for w_ind in wanted: w_conc = concepts[w_ind] if hasattr(w_conc, 'cui'): tmp_conc = { 'label': w_conc.preferred_name, 'cui': w_conc.cui, 'sem_types': w_conc.semtypes, 'score': w_conc.score } tmp['entities'].append(tmp_conc) sentences.append(tmp) if errors: time_log('Errors with extracting concepts!') time_log(errors) return {'sents': sentences, 'sent_text': text}
def searchMetaMap(search): """ setup MetaMap and search UMLS for user desired topic :return: The preferred name for the UMLS concept identified in the text. And the number of different preferred names output """ # mm = MetaMap.get_instance('/Users/nwams/Documents/MetaMap/public_mm/bin/metamap12') mm = MetaMap.get_instance("/Users/yiqingluo/IF5400/public_mm/bin/metamap12") # searchTopic = raw_input('What topic are you searching for? ') searchTopic = [search] # [1] only allow user to search for one topic at a time concepts, error = mm.extract_concepts(searchTopic, [1]) conceptArray = [] for concept in concepts: conceptArray.append(concept.preferred_name) # print "UMLS terms = ", conceptArray return conceptArray, len(conceptArray)
class MetaMapWrapper(object): mm = MetaMap.get_instance( '/Users/hemant/GithubWorkspace/MetaMap/public_mm/bin/metamap18') def __init__(self): pass def annotate(self, text): mm_request = [text] concepts, error = self.mm.extract_concepts(mm_request, [1, 2]) extracted_data = {} symptoms = [] diseases = [] diagnostics = [] for concept in concepts: if hasattr(concept, 'semtypes'): # print(concept) if concept.semtypes == '[sosy]': # Sign or Symptom # sometimes it returns symptoms as a symptom if concept.preferred_name != 'Symptoms' and concept.preferred_name != 'symptoms': symptoms.append(concept.preferred_name) elif concept.semtypes == '[dsyn]': # Disease or Syndrome diseases.append(concept.preferred_name) pass elif concept.semtypes == '[diap]': # Diagnostic Procedure diagnostics.append(concept.preferred_name) if len(symptoms): extracted_data['symptoms'] = symptoms if len(diseases): extracted_data['diseases'] = diseases if len(diagnostics): extracted_data['diagnostics'] = diagnostics return extracted_data
def process_sentences(sentences): mm = MetaMap.get_instance(METAMAP_BINARY_PATH) sentences_ids = list(range(len(sentences))) concepts, error = mm.extract_concepts(sentences, sentences_ids) sentences_concepts = [[] for _ in range(len(sentences))] for concept in concepts: if not isinstance(concept, ConceptMMI): continue sentence_id = int(concept.index) concept_data = { 'preferred_name': concept.preferred_name, 'cui': concept.cui, 'pos_info': extract_positional_information(concept.pos_info), 'semtypes': extract_semantic_types(concept.semtypes), 'score': float(concept.score), } sentences_concepts[sentence_id].append(concept_data) return sentences_concepts
def ConceptExtract(self, sent_text): ''' sent_text: a list of sent text ''' mm = MetaMap.get_instance('./public_mm/bin/metamap16',version = 2016) self.concepts,_ = mm.extract_concepts(sent_text,word_sense_disambiguation=True,\ ignore_stop_phrases=True) self.scores,_ = mm.extract_concepts(sent_text,mmi_output=False,word_sense_disambiguation=True,\ ignore_stop_phrases=True) with open ("chunk.csv", "a") as f: writer = csv.writer(f, delimiter=',') writer.writerow(sent_text) check1, ob1 = get_bp(self.scores, sent_text) if check1: self.concepts.append(ob1) self.scores["C1271104"] = '1000' check2, ob2 = get_sp(self.scores, sent_text) if check2: self.concepts.append(ob2) self.scores['C0428179'] = '1000' deletes, pulses = check_pulse(self.concepts, self.scores, sent_text) if deletes != -1: for i in deletes: # print(i) self.concepts.pop(i) self.scores.pop(CUI_pulse) for j in pulses: # print(j) self.concepts.append(j) self.scores[CUI_pulse] = '1000' break
def readData(NoExpansionFlag, UmlsExpansionFlag): #instantiating metamap mm = MetaMap.get_instance('/home/khyathi/installations/public_mm/bin/metamap') start_command = "/home/khyathi/installations/public_mm/bin/skrmedpostctl start" os.system(start_command) randomNumber = randint(0, 100) csumm=0 infile = open(sys.argv[1],'r') data = json.load(infile) #c=1 for (i, question) in enumerate(data['questions']): if question['type'] == 'summary': csumm +=1 if csumm != randomNumber: continue #if csumm >=3: # break quest = unicode(question['body']).encode("ascii","ignore") questionBow = quest.split() expandedQuestion = [questionBow] + [[]] if NoExpansionFlag == True: expandedQuestion = [questionBow] + [[]] elif UmlsExpansionFlag == True: expandedQuestion = [questionBow] + [expandConcepts(quest)] #print expandedQuestion #raw_input() ideal_summaries = question["ideal_answer"] ideal_answer_sents = [] if isinstance(ideal_summaries, types.StringTypes): ideal_answer_sents = sent_tokenize(ideal_summaries) else: ideal_answer_sents = sent_tokenize(ideal_summaries[0]) """ out = open("./ideal_summaries1/bioasq."+str(csumm)+".txt", "w") for sentence in ideal_answer_sents: out.write(unicode(sentence).encode("ascii","ignore")+"\n") out.close() """ snippets = question['snippets'] #documents = question['documents'] sentences = [] sentenceScoreDict = {} snippetsText = [] for snippet in question['snippets']: text = unicode(snippet["text"]).encode("ascii", "ignore") snippetsText.append(text) if text == "": continue try: sentences += sent_tokenize(text) except: sentences += text.split(". ") #print sentences #exit(1) #for document in question['documents']: #print document #abstractText = unicode( retrieve(document) ).encode("ascii","ignore") #if abstractText == "": # continue #try: # sentences += sent_tokenize(abstractText) #except: # sentences += abstractText.split(". ") for sentence in sentences: sentenceBow = sentence.split() expandedSentence = [sentenceBow] + [[]] if NoExpansionFlag == True: expandedSentence = [sentenceBow] + [[]] elif UmlsExpansionFlag == True: expandedSentence = [sentenceBow] + [expandConcepts(sentence)] similarityScore = similarity(expandedQuestion, expandedSentence) sentenceScoreDict[sentence] = similarityScore summaryFinal = cluster(sentenceScoreDict,csumm) #print "generated summary " + str(csumm) #question = "When does the antipeptic action of bisabolol occur with a pH-value?" pickle.dump(cache, open("cached_umls_json.pkl","wb")) pickle.dump(cuilist, open("cui.pkl","wb")) stop_command = "/home/khyathi/installations/public_mm/bin/skrmedpostctl stop" #stop_command = "~/public_mm/bin/skrmedpostctl stop" os.system(stop_command) #exit(1) return (quest, snippetsText, summaryFinal)
def __init__(self): self.mm = MetaMap.get_instance( '/home/khyathi/installations/public_mm/bin/metamap') self.start_command = "/Users/khyathi/installations/public_mm/bin/skrmedpostctl start" self.stop_command = "/Users/khyathi/installations/public_mm/bin/skrmedpostctl stop"
''' 將 txt 讀入後 呼叫 metamap 做 mapping 回傳 sematic type, CUI, position, negation 可以使用sldi, sldiID兩種格式檔案 ''' from typing import Tuple from pymetamap import MetaMap from mytool import mmi_parser as mmip mm = MetaMap.get_instance('/home/feng/public_mm/bin/metamap20') import csv # txt_file = "clinical_txt/1.txt" # read a file line by line to a List def read_line(txt_file): sentences = list() with open(txt_file, 'r') as f: # i = index number, l = 讀出的某一行內容 for i, l in enumerate(f): sentences.append(l) lines = i + 1 return sentences, lines def extract_sldi(txt_file, output=True): ''' 沒有修改部份內容,使用須注意 負責解析(mapping) txt 檔案中的 term 到 CUI 輸入檔案為 sldi 模式 '''
from pymetamap import MetaMap import sys from bs4 import BeautifulSoup # process xml with topics import gzip import codecs import re mm = MetaMap.get_instance("/data/palotti/public_mm/bin/metamap13") filename = sys.argv[1] outputfile = sys.argv[2] soup = BeautifulSoup(codecs.open(filename, "r")) text_to_use = 1 append = True onlyOne = True expand_metamap = True pweight = 1 tweight = 1 defaultweight = 1 diaweight = 3 def filterConcepts(concepts): query = set() for c in concepts: types = c.semtypes.strip("[]").split(",") #print c #print "Preferred:", c.preferred_name, "<> Trigger:", c.trigger.strip("[]").split("-tx-1-")[0].strip('"') for t in types: if t == "clna": #print "Remedy: ", c.preferred_name, "<> Trigger:", c.trigger.strip("[]").split("-tx-1-")[0].strip('"')
import pandas as pd import ast from pymetamap import MetaMap # https://metamap.nlm.nih.gov/Installation.shtml PATH_TO_METAMAP = '/UMLS/MetaMap/public_mm/bin/metamap18' mm = MetaMap.get_instance(PATH_TO_METAMAP) # https://metamap.nlm.nih.gov/SemanticTypesAndGroups.shtml semantic_types = pd.read_csv("SemanticTypes_2018AB.csv", header=None, sep='|') semantic_types.columns = ['Abbreviation', 'UniqueIdentifier', 'SemanticType'] def get_concept(sentence): ''' extracts UMLS concepts for the given input text param sentence: user provided text type sentence: str returns: UMLS concept and UMLS semantic type abbreviation rtype: str, list ''' max = 0 index = 0 # extract UMLS concept = [index,mm,score,preferred_name,cui,semtypes,trigger,location,pos_info,tree_codes] concepts, error = mm.extract_concepts([sentence]) if concepts:
""" Week 8, in-class task CS 584: Applied BioNLP @author Abeed Sarker email: [email protected] Metamap example based on pymetamap Created: 10/1/2020 ***DO NOT REDISTRIBUTE*** """ from pymetamap import MetaMap import pandas as pd mm = MetaMap.get_instance( '/Users/thiago/Documents/METAMAP/public_mm/binmetamap18') sents = ['john had a heart attack and he has high blood pressure'] concepts, errors = mm.extract_concepts(sents) for c in concepts: print(c.index, c.score, c.preferred_name, c.cui, c.semtypes) print('***---***') f_path = './piboso-train.csv' df = pd.read_csv(f_path) texts = df['Text'] ids = df['Document'] sents = df['Sentence'] from collections import defaultdict concepts_per_sent = defaultdict(list) for t, s, i in zip(texts, sents, ids):
PICO = PICO() PICO_processor = PICO.get_processor() PICO_estimator = PICO.get_estimator(PICO_processor) MED = MED() MED_processor = MED.get_processor() MED_estimator = MED.get_estimator(MED_processor) #from src import Evidence_Proposition_clustering # load attribute tagger from general_utils import negex rfile = open(config.negation_rules) irules = negex.sortRules(rfile.readlines()) mm = None if config.use_UMLS: from pymetamap import MetaMap mm = MetaMap.get_instance(config.metamap_dir) else: mm = None from src.postprocessing import attribute_processor attribute_processor = attribute_processor(mm, negex.negTagger, irules) #bert tokenizer tokenizer = tokenization.FullTokenizer(vocab_file=config.vocab_file, do_lower_case=False) def txt2ntokens(text): tokens = [] for i, word in enumerate(textlist): token = tokenizer.tokenize(word) tokens.extend(token)
import sys from multiprocessing.pool import Pool from nltk import word_tokenize from nltk.corpus import words from pymetamap import MetaMap import numpy as np BAR = 0.03 MIN_LENGTH = 6 MAX_LENGTH = 40 # pylint: disable=invalid-name, len-as-condition, too-many-locals vocab = set(words.words()) mm_home = '/home/nikhil.pattisapu/tools/metamap2016/public_mm/bin/metamap16' mm = MetaMap.get_instance(mm_home) sem_types = [ 'antb', 'bhvr', 'bmod', 'blor', 'bdsu', 'bdsy', 'chem', 'clna', 'cnce', 'clnd', 'dsyn', 'enty', 'evnt', 'fndg', 'food', 'ftcn', 'hlca', 'hlco', 'idcn', 'inch', 'ocdi', 'ocac', 'bpoc', 'orch', 'podg', 'phsu', 'phpr', 'lbpr', 'resa', 'resd', 'sbst', 'sosy', 'tmco' ] mm_threshold = 2 def remove_over_punctuated(sents): """Unused: Remove sentences which contain greater than BAR % of punctuation characters""" res = [] for sent in sents: if len(sent) == 0:
from flask import Flask, request from flask_restful import Resource, Api from flask import jsonify from pymetamap import MetaMap app = Flask(__name__) api = Api(app) mm = MetaMap.get_instance('/Users/subigyanepal/Downloads/public_mm/bin/metamap16') class MetaMapConcepts(Resource): def get(self, text): text = [text] response = [] concepts, error = mm.extract_concepts(text) for concept in concepts: response.append({'score':concept.score, 'preferred_name':concept.preferred_name, 'semtypes':concept.semtypes, 'trigger':concept.trigger, 'pos_info':concept.pos_info, 'negation':concept.trigger[-2]}) return jsonify(concepts = response) api.add_resource(MetaMapConcepts, '/text/<string:text>') if __name__ == '__main__': app.run(threaded=True)
def load_metaMap(): global global_metamap global_metamap = MetaMap.get_instance('../../../../../opt/public_mm/bin/metamap12')
schema_name = 'mimiciii' parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('-N', type=int, help='number of data points to use') args = parser.parse_args() datadir = '/Users/irenechen/Documents/mimic-data' # Connect to postgres with a copy of the MIMIC-III database con = psycopg2.connect(dbname=dbname, user=sqluser) # the below statement is prepended to queries to ensure they select from the right schema query_schema = 'set search_path to ' + schema_name + ';' mm = MetaMap.get_instance( '/afs/csail.mit.edu/group/clinicalml/shared/mimic_folder/timeline/public_mm/bin/metamap18' ) df = pd.read_csv('../pset2/discharge_summaries.csv') N = args.N csvfile = open('dc_%d.csv' % args.N, 'wb') fieldnames = [ 'index', 'pos_info', 'mm', 'trigger', 'semtypes', 'patientid', 'preferred_name', 'score', 'location', 'tree_codes', 'cui' ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader()
def metamap_worker(metamap_path, tweets_path, output_dir, batch_size, batch_start, batch_end=None): """ Runs MetaMap on a subset of tweets in the given dataset. metamap_path: Path to the metamap executable. For example, '/path/to/MetaMap/public_mm/bin/metamap18' tweets_path: Path to a CSV file containing tweets in the standard format. output_dir: Path to a directory in which output batch files should be written. batch_size: Number of tweets worth of concepts to write out in each batch file. batch_start: Index of the tweet in the file to start with. Batches will be numbered automatically. batch_end: Index of the tweet in the file to end with (exclusive). """ # Setup MetaMap instance mm = MetaMap.get_instance(metamap_path) # Read and preprocess tweets tweets = utils.read_tweet_csv(tweets_path) data = tweets.full_text.values.tolist() data = [utils.preprocess_for_metamap(tweet) for tweet in data] tweet_ids = tweets["id"].values.tolist() # Remove spurious and duplicated tweets data, tweet_ids = filter_tweets(data, tweet_ids) assert len(data) == len(tweet_ids) all_concepts = [] batch_idx = batch_start // batch_size print("Starting batch index:", batch_idx) failed_batches = [] for batch_start in range(batch_start, batch_end if batch_end else len(data), METAMAP_BATCH_SIZE): batch_tweets = data[batch_start:batch_start + METAMAP_BATCH_SIZE] batch_ids = list(range(batch_start, batch_start + len(batch_tweets))) try: try: # Extract concepts as a batch concepts, error = mm.extract_concepts(batch_tweets, batch_ids) except (TypeError, IndexError): # Try extracting concepts individually for i, tweet in enumerate(batch_tweets): concepts, error = mm.extract_concepts([tweet], [i + batch_start]) if error is not None: print(error) else: all_concepts.extend([concept_to_dict(concept, tweet_ids) for concept in concepts]) except Exception as e: if isinstance(e, KeyboardInterrupt): raise KeyboardInterrupt print("Failed batch", batch_start) failed_batches.append(batch_start) if batch_start % 1000 == 0: print("Batch {}, {} concepts extracted".format(batch_start, len(all_concepts))) if (batch_start + METAMAP_BATCH_SIZE) % batch_size == 0: df = pd.DataFrame(all_concepts) df.to_csv(os.path.join(output_dir, "concepts_{}.csv".format(batch_idx))) batch_idx += 1 all_concepts = [] # Write out the last (partial) batch if len(all_concepts) > 0: df = pd.DataFrame(all_concepts) df.to_csv(os.path.join(output_dir, "concepts_{}.csv".format(batch_idx)))
def metamap(doc_list): from pymetamap import MetaMap mm = MetaMap.get_instance(args.metamap_path) def process_data(pid, doc_list): data = [] for i, doc in enumerate(doc_list): try: text = clean_text(doc['text']) concepts, error = mm.extract_concepts([text], [doc['_id']]) assert len(text) == len( doc['text'] ), 'Text length does not match after pre-processing' res_list = ddict(list) for k, concept in enumerate(concepts): if concept[1] != 'MMI': continue pos_info = [ list(map(int, x.split('/'))) for x in concept.pos_info.replace(',', ';').replace( '[', '').replace(']', '').split(';') ] men_cnt = [ len(x.split(',')) for x in concept.pos_info.split(';') ] men_sing = replace(concept.trigger, '"').split('"')[1::2][1::2] mentions = mergeList([[men] * men_cnt[j] for j, men in enumerate(men_sing)]) for j, (start, offset) in enumerate(pos_info): end = start + offset res_list[(start, end)].append( (concept.cui, concept.score)) doc['result'] = dict(res_list) data.append(doc) if i % 10 == 0: print('Completed [{}] {}, {}'.format( pid, i, time.strftime("%d_%m_%Y") + '_' + time.strftime("%H:%M:%S"))) except Exception as e: print('\nException Cause: {}'.format(e.args[0])) continue print('All work done {}!!'.format(pid)) return data num_procs = args.workers chunks = partition(doc_list, num_procs) data_list = mergeList( Parallel(n_jobs=num_procs)(delayed(process_data)(i, chunk) for i, chunk in enumerate(chunks))) base_dir = './results/{}'.format(args.data) make_dir(base_dir) dump_pickle(data_list, '{}/{}_{}.pkl'.format(base_dir, args.model, args.split))
from negbio.pipeline2.dner_mm import MetaMapExtractor from negbio.pipeline2.pipeline import NegBioPipeline from pymetamap import MetaMap def read_cuis(pathname): cuis = set() with open(pathname) as fp: for line in fp: line = line.strip() if line: cuis.add(line) return cuis if __name__ == '__main__': argv = parse_args(__doc__) mm = MetaMap.get_instance(argv['--metamap']) if argv['--cuis'] is None: cuis = None else: cuis = read_cuis(argv['--cuis']) extractor = MetaMapExtractor(mm, cuis) pipeline = NegBioPipeline(pipeline=[('MetaMapExtractor', extractor)]) pipeline.scan(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], overwrite=argv['--overwrite'])
from pymetamap import MetaMap from pandas import Series, DataFrame import pandas as pd import numpy as np import os from pyDatalog import pyDatalog # initialize mm = MetaMap.get_instance('/Users/sileshu/Downloads/public_mm/bin/metamap16') pyDatalog.create_terms('RH, PH, C, BR, P, BL, op, Y') # clauses (op[RH,PH,C,BR,P,BL] == 'Eliminate_Hazard') <= (True == RH) (op[RH,PH,C,BR,P,BL] == 'Eliminate_Hazard') <= (False == RH) & (True == PH) (op[RH,PH,C,BR,P,BL] == 'Secondary_Survey') <= (False == RH) & (False == PH) & \ (True == C) & (False == BL) (op[RH,PH,C,BR,P,BL] == 'Control_Bleeding') <= (False == RH) & (False == PH) & \ (True == C) & (True == BL) (op[RH,PH,C,BR,P,BL] == 'Open_the_Airway, Secondary_Survey') <= (False == RH) & \ (False == PH) & (False == C) & (True == BR) & \ (True == P) & (False == BL) (op[RH,PH,C,BR,P,BL] == 'Open_the_Airway, Control_Bleeding') <= (False == RH) & \ (False == PH) & (False == C) & (True == BR) & \ (True == P) & (True == BL) (op[RH,PH,C,BR,P,BL] == 'Open_the_Airway, Start_Artificial_Ventilation, Secondary_Survey') <= (False == RH) & \ (False == PH) & (False == C) & (False == BR) & \ (True == P) & (False == BL) (op[RH,PH,C,BR,P,BL] == 'Open_the_Airway, Start_Artificial_Ventilation, Control_Bleeding') <= (False == RH) & \ (False == PH) & (False == C) & (False == BR) & \ (True == P) & (True == BL) (op[RH,PH,C,BR,P,BL] == 'Open_the_Airway, Start_External_Cardiac_Compressions, Secondary_Survey') \
def getMetaMapConcepts(self, altText=None): """ Returns the MetaMap concepts found using the 'pymetamap' python wrapper. Args: altText (str) The text to be tagged, if it is not the same as the whole narrative the preprocessor was created with. This text won't be stored. Returns: the MetaMap concepts, as described in the pymetamap documentation (list) """ if Preprocessor.textList.get("getMetaMapConcepts") is None: self.parseXML() mm = MetaMap.get_instance('/work/tkakar/public_mm/bin/metamap14') rawText = self.rawText() concepts, error = mm.extract_concepts([rawText]) pattern = re.compile( '(\[(?:(orch|phsu|sosy|dsyn),?(orch|phsu|sosy|dsyn)?)\])') globalIDByConcept = {} returnedList = [] for concept in concepts: if not hasattr(concept, 'aa'): #TODO, see if there is any information that we are missing due to some combination not described by the Regex match = pattern.search(concept.semtypes) if match: returnedList.append(concept) posInfo = concept.pos_info triggerInfo = concept.trigger.split('-') conceptName = triggerInfo[3] #need to replace the quotes in the conceptName conceptName = conceptName.replace('"', '') if ';' or '^' in posInfo: posInfoList = self.offsetParse(posInfo, ';') else: posInfoList = self.offsetParse(posInfo) #We need to change the format of the posInfos from (offset,span) to (offsetStartIndex, offsetEndIndex) here: posInfoList = [(offset, span + offset) for (offset, span) in posInfoList] for listIndex, (startIndex, endIndex) in enumerate(posInfoList): lfNum = rawText.count('\n', 0, startIndex) lastIdx = rawText.rfind( conceptName, 0, startIndex + len(conceptName)) #you're going to forget this tomorrow morning, so this is the number of line feeds between the last instance of the concept name and where metamap thinks the word is. lfNumSpecific = rawText.count( '\n', lastIdx, startIndex) #For some reason, we need to subract one at the end, TODO: Figure out why posInfoList[listIndex] = (startIndex - (lfNum + 1) + lfNumSpecific - 1, endIndex - (lfNum + 1) + lfNumSpecific - 1) globalIDList = [] #we have the fixed offsets for each mention of the semantic type. we now need to find their location in the xml file. for newStartIdx, newEndIdx in posInfoList: # print "newStartIdx: ", newStartIdx # print "newEndIdx: ", newEndIdx globalIds = self.placeOffsetInXML( conceptName, word_tokenize(conceptName), newStartIdx, newEndIdx - newStartIdx) globalIDList.append(globalIds) globalIDByConcept[concept] = globalIDList for key, value in globalIDByConcept.iteritems(): for gIDList in value: for gID in gIDList: conceptXMLTag = self.root.find(".//*[@globalID='" + str(gID) + "']") tempMetaMapElem = ET.Element("METAMAP") tempMetaMapElem.text = key.semtypes.replace("'", '') conceptXMLTag.append(tempMetaMapElem) Preprocessor.textList['getMetaMapConcepts'] = returnedList self.writeToXML() return Preprocessor.textList.get('getMetaMapConcepts')
def __init__(self, args): from pymetamap import MetaMap self.model = MetaMap.get_instance(args.metamap_path)
def message(payload): """Display the onboarding welcome message after receiving a message that contains "start". """ event = payload.get("event", {}) channel_id = event.get("channel") user_id = event.get("user") text = event.get("text") if text.find("displayed.")!=-1 : return print(text) global chatstep global concepts global cuis global morecui print(chatstep) #---Onboarding if chatstep==0 and text.find("This content can't be displayed.")==-1: if text and text.find("This content can't be displayed.")==-1: chatstep = chatstep+1 return start_onboarding(user_id, channel_id) elif chatstep==1: #getting the symptoms if text and text.lower() != "done" and text.find("This content can't be displayed.")==-1: symptoms.append(text) elif text and text.lower() == "done": chatstep+=1 print(chatstep) print(text) #IMPROTANT: metamap services should be started before running the following lines # "wsdserverctl start" and "skrmedpostctl start" mm = MetaMap.get_instance(METAMAP) concepts, error = mm.extract_concepts(symptoms) text=None #keep only semantic type of "sign or symptom" sos = [] for con in concepts: if con.semtypes=="[sosy]": sos.append(con) cuis.append(con.cui) send_message(user_id, channel_id,"Thanks! Let's check if I understand you correctly!") send_message(user_id, channel_id,"Do you have the following symptoms?\n") #send_divider(user_id, channel_id) #confirm symptoms txt="" for con in sos: txt += "*{}*\n".format(con.preferred_name) send_message(user_id, channel_id,txt) send_divider(user_id, channel_id) send_message(user_id, channel_id,"Please enter yes/no") symptoms.clear() return #open model elif chatstep==2: # confirmation of symptoms if text and text.lower() == "yes" and text.find("This content can't be displayed.")==-1: chatstep = chatstep+1 morecui = list(findMoreSymptoms(cuis)) if len(morecui)!=0: msg = "I found some related sypmtoms. If you have any of the following symptoms please enter the numbers separated by comma. Otherwise please enter \'no\'" send_message(user_id,channel_id,msg) send_divider(user_id,channel_id) txt="" for i in range(len(morecui)): con = morecui[i] #txt += "{}-{} , ".format(i+1,con2word[con][0]) try: txt += "{}- {} : {} \n".format(i+1,con2word[con][0],nci[con]) except: try: txt += "{}- {} : {} \n".format(i+1,con2word[con][0],csp[con]) except: txt += "{}- {} \n".format(i+1,con2word[con][0]) send_message(user_id,channel_id,txt) send_divider(user_id,channel_id) return else:#no more symptoms found spec,prob = predictSpecialist(cuis) if (spec): send_message(user_id,channel_id,"Based on my analysis you should visit the following specialist:") txt="" for i in len(spec): txt += "{} (probability = )\n-------------------------------\n".format(spec[i],prob[i]) send_message(user_id,channel_id,txt) else: send_message(user_id,channel_id,"I was not able to find any specialist based on your symptoms so please visit a general practitioner for more guidance.") chatstep=4 return elif text and text.lower() == "no": send_message(user_id,channel_id,"*Please describe your signs and symptoms. When finished please type \'done\'.*") chatstep = chatstep-1 text=None return elif chatstep==3: if text.find("This content can't be displayed.")!=-1: return chatstep=chatstep+1 print(text) if text and text.lower()!="no" and text.find("displayed")==-1: try: morenum = text.split(",") morenum = [int(x) for x in morenum] print(morenum) print(morecui) for i in morenum: cuis.append(morecui[i-1]) except: chatstep = chatstep-1 send_message(user_id,channel_id,"Sorry! I did not understand! If you have any of the above symptoms please enter the numbers separated by comma. Otherwise, please enter No") return spec,prob = predictSpecialist(cuis) if (len(spec)>0): send_message(user_id,channel_id,"Based on my analysis you should visit the following specialist:") for i in range(len(spec)): txt = "*{} (probability = {:.3f} )*".format(con2word[spec[i]][0],prob[i]) #txt = "*{} (probability = {:.2f} )*".format(spec[i],prob[i]) send_message(user_id,channel_id,txt) send_divider(user_id,channel_id) else: send_message(user_id,channel_id,"Sorry! I was not able to find any specialist based on your symptoms so please visit a general practitioner for more guidence.") return send_message(user_id,channel_id,"Thanks for using Docmatch :wave:") return #elif chatstep==4 and text.find("displayed")==-1: #chatstep+=1 #if text and text.lower()=="yes" and text.find("This content can't be displayed.")==-1: #chatstep=1 #return start_onboarding(user_id, channel_id) #elif text and text.lower().find("no")!=-1 and text.find("This content can't be displayed.")==-1: #return send_message(user_id,channel_id," Have a nice time :wave:") return
from pymetamap import MetaMap # the server installed on your machine mm = MetaMap.get_instance('/work/tkakar/public_mm/bin/metamap14') ### if using manual input #sents = ['she fell down and felt dizzy after taking neproxin.', 'heart attack'] #concepts,error = mm.extract_concepts(sents,[1,2],word_sense_disambiguation=True) ## if using file as input sample_text = 'textSample.txt' concepts,error = mm.extract_concepts(filename=sample_text,word_sense_disambiguation=True) ## specify output filename f = open('Vimig/Results.txt','w') for concept in concepts: ## if want to output specific semtypes uncomment below command #if concept.semtypes in [ '[sosy]', '[phsu]', '[dsyn]']: print >> f, concept ## to print specific information as output #print >> f, "Trigger= "+concept.trigger +"\t", "SemType= " +concept.semtypes+"\t", "Pos= "+concept.pos_info+"\t" # #f.write(concept) f.close()