def get_recent_lab_values(empi, date): p = loader.get_patient_by_EMPI(empi) lab_latest = {} if 'Lab' in p.keys(): for lab in p['Lab']: if lab['Seq_Date_Time'] and extract_data.parse_date(lab['Seq_Date_Time']) < date: lab_date = extract_data.parse_date(lab['Seq_Date_Time']) if lab['Group_Id'] in lab_latest: recorded_test_date = lab_latest[lab['Group_Id']][0] if lab_date > recorded_test_date: # keep most recent test value lab_latest[lab['Group_Id']] = (lab_date, lab['Result']) else: lab_latest[lab['Group_Id']] = (lab_date, lab['Result']) return lab_latest
def main(): empi = "FAKE_EMPI_385" # testing a single patient symptoms_regexes = getSymptomsRegexes() person = loader.get_patient_by_EMPI(empi) operation_date = build_graphs.get_operation_date(person) note_types = ['Car', 'Lno'] person_pos_history = {} person_neg_history = {} sec_per_day = 24 * 60 * 60 for note_type in note_types: print 'Examining ' + note_type + ' Notes for Patient ' + empi date_key = extract_data.get_date_key(note_type) if note_type in person.keys() and date_key != None: for i in range(len(person[note_type])): print '\tNote' + str(i) doc = person[note_type][i] date = extract_data.parse_date(doc[date_key]) if date != None: delta_days = (date - operation_date).total_seconds() / sec_per_day for sym in symptoms_regexes: normal, neg_pre, neg_suff = [bool(x.search(doc['free_text'])) for x in symptoms_regexes[sym]] if neg_pre or neg_suff: if sym in person_neg_history: person_neg_history[sym].append(delta_days) else: person_neg_history[sym] = [delta_days] print '\t\tNegative,' + sym + ',' + str(delta_days) elif normal: if sym in person_pos_history: person_pos_history[sym].append(delta_days) else: person_pos_history[sym] = [delta_days] print '\t\tPositive,' + sym + ',' + str(delta_days) return person_pos_history, person_neg_history
def get_sent_vector(self, empi): patient = loader.get_patient_by_EMPI(empi) operation_date = extract_data.get_operation_date(patient) diagnoses = get_diagnoses(empi) date_key = extract_data.get_date_key(self.note_type) notes = [] if self.note_type in patient.keys() and date_key != None: # Get sorted list of notes before procedure time_idx_pairs = [] for i in range(len(patient[self.note_type])): doc = patient[self.note_type][i] date = extract_data.parse_date(doc[date_key]) if date != None and date < operation_date: time_idx_pairs.append((operation_date - date, i)) time_idx_pairs.sort() for time, idx in time_idx_pairs[:self.max_notes]: doc = patient[self.note_type][idx] notes.append(doc['free_text']) # ensure that notes vector length is equal to max_notes if len(notes) < self.max_notes: delta = self.max_notes - len(notes) for i in range(delta): notes.append('') # Turn notes into Doc Vectors vectors = map(self.get_sent_vector_from_doc, notes) return np.array(vectors).flatten()
def get_sent_vector(self, empi): patient = loader.get_patient_by_EMPI(empi) operation_date = extract_data.get_operation_date(patient) diagnoses = get_diagnoses(empi) date_key = extract_data.get_date_key(self.note_type) notes = [] if self.note_type in patient.keys() and date_key != None: # Get sorted list of notes before procedure time_idx_pairs = [] for i in range(len(patient[self.note_type])): doc = patient[self.note_type][i] date = extract_data.parse_date(doc[date_key]) if date != None and date < operation_date: time_idx_pairs.append((operation_date - date, i)) time_idx_pairs.sort() for time,idx in time_idx_pairs[:self.max_notes]: doc = patient[self.note_type][idx] notes.append(doc['free_text']) # ensure that notes vector length is equal to max_notes if len(notes) < self.max_notes: delta = self.max_notes - len(notes) for i in range(delta): notes.append('') # Turn notes into Doc Vectors vectors = map(self.get_sent_vector_from_doc, notes) return np.array(vectors).flatten()
def get_diagnoses(empi): """Given an empi, will the return the diagnosis timeline T for that patient. T is just an array of tuples of the form (diagnosis date, Code_Type, code, diagnosis name), sorted by date. Note that a given date may, and often does, have several diagnoses. Also, a diagnosis can be repeatedly reported on every visit.""" p = loader.get_patient_by_EMPI(empi) diagnoses = [] if 'Dia' in p.keys(): for dia in p['Dia']: diagnoses.append((extract_data.parse_date(dia['Date']), dia['Code_Type'], dia['Code'], dia['Diagnosis_Name'])) diagnoses.sort() return diagnoses
def get_concatenated_notes(self, empi): person = loader.get_patient_by_EMPI(empi) operation_date = build_graphs.get_operation_date(person) date_key = extract_data.get_date_key(self.type) notes = [] sec_per_month = 24 * 60 * 60 * (365.0 / 12) if self.type in person.keys() and date_key != None: for i in range(len(person[self.type])): doc = person[self.type][i] date = extract_data.parse_date(doc[date_key]) if date != None and date < operation_date: if self.look_back_months and (operation_date - date).total_seconds() > (self.look_back_months * sec_per_month): continue notes.append(doc['free_text']) return '\n\n'.join(notes)
def get_lab_history_before_date(empi, date, time_thresholds_months): """Given an empi and a date, will return a summarized history of the labs for that patient before the date. Specifically, will return a dictionary where the key is a lab group id and the value is a list of size len(time_threshold_months) where each index represents whether the lab was mostly high or low in the threshold times set it time_thresholds_months. For example, if we have 'BUN' => ['H', None, 'L'], then this indicates a transition from low (L) to high (H) leading up to the indicated date.""" p = loader.get_patient_by_EMPI(empi) lab_history_counts = {} """ lab_history_counts is 2-D array first dimension = time period second dimension = counts of 'H', 'L', and None example = [[15, 1, 2], ...] means in the past 1 month, 'H' was most (15 times) """ seconds_in_month = 365 * 24 * 60 * 60 / 12 values = ['H', 'L', None] if 'Lab' in p.keys(): for lab in p['Lab']: if lab['Seq_Date_Time'] and extract_data.parse_date(lab['Seq_Date_Time']) < date: lab_date = extract_data.parse_date(lab['Seq_Date_Time']) value = lab['Abnormal_Flag'] if lab['Abnormal_Flag'] in ['H', 'L'] else None value_index = values.index(value) time_index = 0 while time_index < len(time_thresholds_months) and (date - lab_date).total_seconds() > (time_thresholds_months[time_index] * seconds_in_month): time_index += 1 if time_index >= len(time_thresholds_months): continue if lab['Group_Id'] not in lab_history_counts: lab_history_counts[lab['Group_Id']] = np.zeros([len(time_thresholds_months), len(values)]) lab_history_counts[lab['Group_Id']][time_index][value_index] += 1 lab_history = {} for lab_name in lab_history_counts: lab_history[lab_name] = [None] * len(time_thresholds_months) for i in range(len(time_thresholds_months)): lab_history[lab_name][i] = values[lab_history_counts[lab_name][i].argmax()] return lab_history
def get_labs_before_date(empi, date): """Given an empi and a date, will return the labs for that patient before that date. Specifically, will return four dictionaries where the key is always the lab group id and the values are the total counts, low counts, high counts, and latest (date, low/high) tuple for that test respectively. Note that low and high mean the test value was below or above the norm respectively.""" p = loader.get_patient_by_EMPI(empi) lab_counts = {} lab_lows = {} lab_highs = {} lab_latest = {} if 'Lab' in p.keys(): for lab in p['Lab']: if lab['Seq_Date_Time'] and extract_data.parse_date(lab['Seq_Date_Time']) < date: if lab['Group_Id'] in lab_counts: lab_counts[lab['Group_Id']] += 1 else: lab_counts[lab['Group_Id']] = 1 lab_date = extract_data.parse_date(lab['Seq_Date_Time']) if lab['Group_Id'] in lab_latest: recorded_test_date = lab_latest[lab['Group_Id']][0] if lab_date > recorded_test_date: # keep most recent test value lab_latest[lab['Group_Id']] = (lab_date, lab['Abnormal_Flag']) else: lab_latest[lab['Group_Id']] = (lab_date, lab['Abnormal_Flag']) if lab['Abnormal_Flag']: if lab['Abnormal_Flag'] == 'L': if lab['Group_Id'] in lab_lows: lab_lows[lab['Group_Id']] += 1 else: lab_lows[lab['Group_Id']] = 1 elif lab['Abnormal_Flag'] == 'H': if lab['Group_Id'] in lab_highs: lab_highs[lab['Group_Id']] += 1 else: lab_highs[lab['Group_Id']] = 1 return lab_counts, lab_lows, lab_highs, lab_latest
def get_encounters(empi): """Given an empi, returns a list of encounters for that patient sorted by Admit Date (since Discharge Date is not always recorded).""" p = loader.get_patient_by_EMPI(empi) encounters = [] if 'Enc' in p.keys(): for enc in p['Enc']: extra_diagnoses = 0 for i in range(1, 10): if enc['Diagnosis_' + str(i)]: extra_diagnoses += 1 if enc['Admit_Date']: encounters.append((extract_data.parse_date(enc['Admit_Date']), str(enc['Inpatient_Outpatient']), extract_data.parse_date(enc['Discharge_Date']), int(enc['LOS_Days']) if enc['LOS_Days'] else 0, extra_diagnoses)) encounters.sort(key = lambda x: x[0]) # just sort on Admit_Date return encounters
def select_doc(self, doc, operation_date, doc_type): """ description: function that returns is specific doc should be used inputs: dict of the doc, datetime of the procedure, string of doc type output: boolean """ doc_date_text = doc[extract_data.get_date_key(doc_type)] doc_date = extract_data.parse_date(doc_date_text) if doc_date is None: return False time_diff = (doc_date - operation_date).days if self.time_horizon != None: return time_diff <= 0 and abs(time_diff) <= abs(self.time_horizon) else: return time_diff <= 0
def parse_value(self, doc, operation_date, doc_type): """ description: function that returns the desired value from a document inputs: dict of the doc, datetime of the procedure, string of doc type ouput: value of any type """ note = doc['free_text'].lower() doc_date_text = doc[extract_data.get_date_key(doc_type)] doc_date = extract_data.parse_date(doc_date_text) delta_days = (doc_date - operation_date).days values = [] for pattern in self.patterns: values += [ x for x in re.findall(pattern, note) if len(x) > 0 and not x in [".", " "] ] if values != []: pass # print values if len(values) > 0 and not self.method in ['found', 'count', 'other']: val_before = values values = [float(x) for x in values if unicode(x).isnumeric()] try: if len(values) == 0: return None else: return (delta_days, sum(values) / len(values)) except: print "\n" * 5 print values print "\n" * 5 raise elif self.method == 'other': #returns entire value list return (delta_days, values) elif self.method == 'found': return (delta_days, 1) elif self.method == 'count': return [(delta_days, len(values))] return None
def get_latest_concatenated_notes(self, empi): person = loader.get_patient_by_EMPI(empi) operation_date = build_graphs.get_operation_date(person) date_key = extract_data.get_date_key(self.type) notes = [] if self.type in person.keys() and date_key != None: time_key_pairs = [] for i in range(len(person[self.type])): doc = person[self.type][i] date = extract_data.parse_date(doc[date_key]) if date != None and date < operation_date: time_key_pairs.append((operation_date - date, i)) time_key_pairs.sort() for time,key in time_key_pairs[:self.max_notes]: doc = person[self.type][key] notes.append(doc['free_text']) # ensure that notes vector length is equal to max_notes if len(notes) < self.max_notes: delta = self.max_notes - len(notes) for i in range(delta): notes.append('') return np.array(notes)
def parse_value(self, doc, operation_date, doc_type): """ description: function that returns the desired value from a document inputs: dict of the doc, datetime of the procedure, string of doc type ouput: value of any type """ note = doc['free_text'].lower() doc_date_text = doc[extract_data.get_date_key(doc_type)] doc_date = extract_data.parse_date(doc_date_text) delta_days = (doc_date - operation_date).days values = [] for pattern in self.patterns: values += [x for x in re.findall(pattern, note) if len(x) > 0 and not x in [".", " "]] if values != []: pass# print values if len(values) > 0 and not self.method in ['found', 'count', 'other']: val_before = values values = [float(x) for x in values if unicode(x).isnumeric()] try: if len(values) == 0: return None else: return (delta_days, sum(values)/len(values)) except: print "\n"*5 print values print "\n"*5 raise elif self.method == 'other': #returns entire value list return (delta_days, values ) elif self.method == 'found': return (delta_days, 1) elif self.method == 'count': return [(delta_days, len(values))] return None