def negations_pycontextnlp_individual_transcript(clinical_text): PYCONTEXTNLP_MODIFIERS = r'/' + os.getcwd( ) + '/data/pycontextnlp_modifiers.yml' PYCONTEXTNLP_TARGETS = r'/' + os.getcwd( ) + '/data/pycontextnlp_targets.yml' modifiers = itemData.get_items(PYCONTEXTNLP_MODIFIERS) targets = itemData.get_items(PYCONTEXTNLP_TARGETS) sentences = transcript_to_sentences_of_tokens(clinical_text, False) list_negated_edges = [] list_positions = [] curr_combined_length = 0 for sentence in sentences: returned_negated_edges = pycontextnlp_markup_sentence( sentence.lower(), modifiers, targets) for edge in returned_negated_edges: list_positions.append( ((curr_combined_length + edge[0].getSpan()[0], curr_combined_length + edge[0].getSpan()[1]), (curr_combined_length + edge[1].getSpan()[0], curr_combined_length + edge[1].getSpan()[1]))) # add 1 to account for stripped space curr_combined_length += len(sentence) + 1 list_negated_edges.extend(returned_negated_edges) return (list_negated_edges, list_positions)
def test_1(self): sent1 = 'IMPRESSION: 1. R/O STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM.' print(os.getcwd()) modifiers = itemData.get_items( os.path.join(os.getcwd(), "../../KB/pneumonia_modifiers.yml")) targets = itemData.get_items( os.path.join(os.getcwd(), "../../KB/pneumonia_targets.yml")) markup = pyConText.ConTextMarkup() markup.setRawText(sent1.lower()) markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") found = False for node in markup.nodes(data=True): if 'r/o' in str(node): found = True assert found
def apply_context(input_context, modifier_path, target_path): """ Function that applies context algorithm on patient records input. Returns dictionary: {<record number/id>: { "object" : <context document>, "xml" : <context as xml string> } } """ print("\nNumber of patient records that will be processed:", len(input_context.index)) # Obtain itemdata modifiers = itemData.get_items(modifier_path) targets = itemData.get_items(target_path) # Initialize context object context = pyConText.ConTextDocument() # For each patient record in the input data file: for record_index in input_context.index: record_text = input_context.at[record_index, "text"] record_nr = input_context.at[record_index, "record"] # check if record is string, otherwise skip markup for record if isinstance(record_text, str): # Apply context context = \ markup_record(context, record_text, record_nr, modifiers, targets) else: print( f"\nRecord number {record_nr} is no string. This record is skipped." ) # print(f"Output dict after record: {record_nr}\n{output_dict}") # n_objects = len(output_dict) # print(f"\nOutput object contains {n_objects} context objects\n") return (context)
bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right base, suggestive of post-inflammatory changes.""", """IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no definite consolidation.""", """IMPRESSION: 1. 2.0 cm cyst of the right renal lower pole. Otherwise, normal appearance of the right kidney with patent vasculature and no sonographic evidence of renal artery stenosis. 2. Surgically absent left kidney.""", """IMPRESSION: No pneumothorax.""", """IMPRESSION: No definite pneumothorax""", """IMPRESSION: New opacity at the left lower lobe consistent with pneumonia.""" ] modifiers = itemData.get_items( "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.yml" ) targets = itemData.get_items( "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.yml" ) def markup_sentence(s, modifiers, targets, prune_inactive=True): """ """ markup = pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.pruneMarks()
iris = [] truths = [] sentences = [] with open('./komenti/mimic_will_fix.tsv') as sentfile: reader = csv.reader(sentfile, delimiter='\t') for row in reader: print(row) truth = "n" in row[3] truths.append(truth) iris.append(row[1]) sentences.append(row[5]) modifiers = itemData.get_items( "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/20c752d6bd5191833f21ab81fc7f41877dca1db6/KB/pneumonia_modifiers.yml" ) for i in range(len(sentences)): # omg targets = [] with open('./hpo_labels.txt') as labelfile: reader = csv.reader(labelfile, delimiter='\t') for row in reader: if (row[1] == iris[i]): targets.append(contextItem((row[0], row[1], '', ''))) s = sentences[i] markup = pyConText.ConTextMarkup()
entity_types = ['PRODUCT', 'ONTOLOGY'] print('ENTITY_TYPES = ', entity_types) def is_url(url): regex = re.compile(r'^(?:http|ftp)s?://', re.IGNORECASE) return re.match(regex, url) if not is_url(args.modifiers): args.modifiers = pathlib.Path(os.path.abspath(args.modifiers)).as_uri() if not is_url(args.targets): args.targets = pathlib.Path(os.path.abspath(args.targets)).as_uri() logging.info('loading targets=${0}'.format(args.targets)) targets = itemData.get_items(args.targets) logging.info('loading modifiers=${0}'.format(args.modifiers)) modifiers = itemData.get_items(args.modifiers) warnings.filterwarnings("ignore") def process(dto): if 'meta' not in dto or 'DC.date' not in dto['meta']: context_concepts = process_default(dto) return context_concepts else: return process_jsonnlp(dto) def process_default(dto):
args = parser.parse_args() data_p = args.data_path note_c = args.note_text_column save_p = args.save_path accuracy_f = int(args.accuracy_flag) truth_c = args.truth_column majority = int(args.majority) #Read in the data df = pd.read_excel(data_p) #Read in the modifiers and the targets #modifiers = itemData.get_items('https://raw.githubusercontent.com/wcmc-research-informatics/SI_Ideation/master/pycontext/amia_2017.yml') modifiers = itemData.get_items( 'https://raw.githubusercontent.com/wcmc-research-informatics/SI_Ideation/master/pycontext/MEDINFO2013_b.yaml' ) targets = itemData.get_items( 'https://raw.githubusercontent.com/wcmc-research-informatics/SI_Ideation/master/pycontext/targets.yml' ) df = generate_predictions(df, modifiers, targets, note_c, majority) #print the accuracy score of the predictions - if test if accuracy_f == 1: print(accuracy_score(df[truth_c], df['pycontext_label'])) print(classification_report(df[truth_c], df['pycontext_label'])) #save the predictions to an excel file df.to_excel(save_p, index=False)