Example #1
0
def map_episode_unit_type(unit_code, start_time=None):
  unit_code_str = str(unit_code).strip()
   
  if start_time is not None:
    time_delta = get_hours_between_datetimes(
      '2020-04-08 00:00:00', start_time)
  
  if '8NC' in unit_code_str:
    if time_delta > 0:
      return 'intensive_care_unit'
    else:
      return 'coronary_care_unit'
  if 'ER' in unit_code_str:
    return 'emergency_room'

  if unit_code_str not in UNIT_TYPE_MAP:
    print('Unrecognized unit type: ' + unit_code_str)
    return ''
  else:
    return UNIT_TYPE_MAP[unit_code_str]
Example #2
0
def is_between_datetimes(dt, d1, d2):
    delta_low = get_hours_between_datetimes(d1, dt)
    delta_high = get_hours_between_datetimes(dt, d2, default_now=True)
    return delta_low > 0 and delta_high > 0
Example #3
0
   if lab_name == 'eosinophil_count': continue
   if lab_name in ['red_blood_cell_count', 'mean_corpuscular_volume', 
     'mean_corpuscular_hemoglobin', 'mean_corpuscular_hemoglobin_concentration']: continue
   
   if patient_id not in lab_bins:
     lab_bins[patient_id] = {}
   if lab_name not in lab_names:
     lab_names.append(lab_name)
   if lab_name not in patients_with_data:
     patients_with_data[lab_name] = set()
   if lab_name not in lab_bins[patient_id]:
     lab_bins[patient_id][lab_name] = [None for x in range(0,n_time_points)]
 
   episode_start_time = eligible_episodes[patient_id][0]
   
   hours_since_admission = get_hours_between_datetimes(episode_start_time, lab_sample_time)
   if hours_since_admission > left_limit and hours_since_admission < right_limit:
     bin_num = int(hours_since_admission / hours_per_period) + int(left_offset / hours_per_period)
     if bin_num >= n_time_points: continue
     if lab_name in lab_bins[patient_id] and \
       bin_num < len(lab_bins[patient_id][lab_name])-1 and \
       lab_bins[patient_id][lab_name][bin_num] is not None:
       # Pick the most abnormal (simplified here to highest)
       if lab_value > lab_bins[patient_id][lab_name][bin_num]: 
         lab_bins[patient_id][lab_name][bin_num] = lab_value
     else: 
       lab_bins[patient_id][lab_name][bin_num] = lab_value
       patients_with_data[lab_name].add(patient_id)
       total_lab_num += 1
     total_entries_num += 1
   patient_ids.append(patient_id)
Example #4
0
    lab_result_units = row.resultunit

    if lab_name.lower() in LAB_SKIP_VALUES or \
       lab_result_string.lower() in LAB_SKIP_VALUES:
        continue

    if lab_result_string in LAB_CANCELLED_FLAGS:
        lab_result_status = 'cancelled'
        lab_result_string = ''
    elif 'attente' in lab_result_string:
        lab_result_status = 'pending'
        lab_result_string = ''
    else:
        lab_result_status = 'resulted'

    delta_hours = get_hours_between_datetimes(
        pcr_sample_times[str(patient_mrn)], str(lab_sample_time))

    if delta_hours > -48 and delta_hours < 7 * 24:

        mapped_lab_name = map_lab_name(lab_name)
        mapped_lab_sample_site = map_lab_sample_site(lab_name, lab_sample_site)

        try:
            mapped_lab_value = map_lab_result_value(lab_result_string)
        except:
            print('Invalid lab value: ' + str(lab_result_string))
            continue

        if mapped_lab_name == 'venous_o2_sat':
            print(mapped_lab_value)
Example #5
0
df = sql_query("SELECT * FROM dw_v01.dw_rad_examen "+
  "WHERE dossier IN ('S" + "', 'S".join(patient_mrns) + "') " +
  "AND date_heure_exam > '2020-01-01'")

imaging_data_rows = []
patients_with_imaging = []
imaging_accession_numbers = []

for index, row in df.iterrows():[]
  lower_desc = row.description.lower()
  row_patient_mrn = str(row.dossier)[1:]
  
  if ('rx' in lower_desc and 'poumon' in lower_desc) or \
     ('scan' in lower_desc and 'thorax' in lower_desc):
     #('scan' in lower_desc and 'abdo' in lower_desc):
    hours = get_hours_between_datetimes(
      pcr_sample_times[row_patient_mrn], row.date_heure_exam)
    
    if hours < -48: continue
    
    patients_with_imaging.append(row_patient_mrn)
    imaging_accession_numbers.append(row.accession_number)
      
    imaging_accession_uid = generate_accession_uid(row.accession_number)
    imaging_acquired_time = row.date_heure_debut_examen
    
    if 'rx' in lower_desc: modality = 'xr'
    elif 'scan' in lower_desc: modality = 'ct'

    imaging_data_rows.append([
      row_patient_mrn,
      imaging_accession_uid,
Example #6
0
# Find the last episode for each patient to attribute death
for row in episode_data_rows:
  
  patient_mrn = str(row[0])
  episode_id = str(row[1])
  episode_start_time = str(row[3])

  episode = {
    'episode_id': episode_id,
    'episode_start_time': episode_start_time
  }

  delta = 1
  if patient_mrn in last_episode_by_dossier:
    delta = get_hours_between_datetimes(
      last_episode_by_dossier[patient_mrn]['episode_start_time'],
      episode_start_time
    )
  
  if delta > 0:
    last_episode_by_dossier[patient_mrn] = episode

  episode_ids.append(episode_id)
  episodes_by_id[episode_id] = episode

df = sql_query("SELECT DISTINCT * FROM dw_test.orcl_cichum_bendeces_live WHERE " + \
  "dossier in ('" + "', '".join(patient_mrns) + "') " + \
  "AND dhredeces > '2020-01-01'")

for index, row in df.iterrows():
  patient_mrn = str(row.dossier)
  
for row in patient_data_rows:
    patient_mrn = str(row[0])
    patient_mrns.append(patient_mrn)
    pcr_sample_times[patient_mrn] = row[2]

df = sql_query("SELECT * from dw_test.orcl_hev_bipap_live WHERE " + \
  "start_dtm > '2020-01-01' AND dossier in (" + ", ".join(patient_mrns) + ")")

intervention_data_rows = []

for index, row in df.iterrows():

    patient_mrn = str(int(row.dossier))
    intervention_start_time = str(row.start_dtm)
    intervention_end_time = str(row.end_dtm)

    delta_hours = get_hours_between_datetimes(pcr_sample_times[patient_mrn],
                                              intervention_start_time)

    if delta_hours < -48: continue

    intervention_data_rows.append([
        patient_mrn, 'mechanical_ventilation', intervention_start_time,
        intervention_end_time
    ])

print('Total rows: %d' % len(intervention_data_rows))

write_csv(TABLE_COLUMNS['intervention_data'], intervention_data_rows,
          os.path.join(CSV_DIRECTORY, 'intervention_data.csv'))
Example #8
0
df = sql_query(
  "SELECT dossier, noadm, dhreadm, dhredep, diagdesc FROM " +
  "dw_test.orcl_cichum_sejurg_live WHERE " + \
  "dossier in ('" + "', '".join(patient_mrns) + "') " + \
  "AND dhreadm > '2020-01-01'"
)

episode_data_rows = [
  [
    map_string_lower(row.dossier), 
    map_string_lower(int(row.noadm)), 
    map_episode_unit_type('ER', None),  
    map_time(row.dhreadm), 
    map_time(row.dhredep),
    map_string_lower(row.diagdesc),
    get_hours_between_datetimes( \
      row.dhreadm, row.dhredep, default_now=True),
  ] for i, row in df.iterrows() \
  if row.dhreadm != '' and \
    pcr_sample_times[str(row.dossier)][0] != '' and \
    get_hours_between_datetimes( \
    row.dhreadm, pcr_sample_times[str(row.dossier)][0]
  ) > 0
]

episode_ids = [
  [map_string_lower(row.dossier)]
  for i, row in df.iterrows()
]

# Get inpatient location data from ADT
# 
Example #9
0
def correlate_labs(conn):

    query_art_pco2 = "SELECT patient_data.patient_site_uid, " + \
      " lab_sample_time, lab_result_value from lab_data " + \
      " INNER JOIN patient_data ON " + \
      " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
      " lab_data.lab_name = 'pco2' AND " + \
      " lab_data.lab_sample_site = 'arterial_blood' AND " + \
      " lab_data.lab_result_status = 'resulted' AND " + \
      " patient_data.patient_covid_status = 'positive'"

    query_ven_pco2 = "SELECT patient_data.patient_site_uid, " + \
      " lab_sample_time, lab_result_value from lab_data " + \
      " INNER JOIN patient_data ON " + \
      " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
      " lab_data.lab_name = 'pco2' AND " + \
      " lab_data.lab_sample_site = 'venous_blood' AND " + \
      " lab_data.lab_result_status = 'resulted' AND " + \
      " patient_data.patient_covid_status = 'positive'"

    query_d_dimer = "SELECT patient_data.patient_site_uid, " + \
      " lab_sample_time, lab_result_value from lab_data " + \
      " INNER JOIN patient_data ON " + \
      " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
      " lab_data.lab_name = 'd_dimer' AND " + \
      " lab_data.lab_result_status = 'resulted' AND " + \
      " patient_data.patient_covid_status = 'positive'"

    arterial_pco2s = sql_fetch_all(conn, query_art_pco2)
    venous_pco2s = sql_fetch_all(conn, query_ven_pco2)
    d_dimers = sql_fetch_all(conn, query_d_dimer)

    pairs = []

    for arterial_pco2 in arterial_pco2s:
        id_1, st_1, rv_1 = arterial_pco2
        for venous_pco2 in venous_pco2s:
            id_2, st_2, rv_2 = venous_pco2
            if id_1 != id_2: continue
            av_pco2 = rv_2 - rv_1
            if av_pco2 < 0: continue
            dt12 = get_hours_between_datetimes(st_1, st_2)
            if dt12 < 2:
                pairs.append([arterial_pco2, venous_pco2])

    x = []
    y = []
    ids = []

    for d_dimer in d_dimers:
        id_1, st_1, rv_1 = d_dimer
        for arterial_pco2, venous_pco2 in pairs:
            id_2, st_2, rv_2 = arterial_pco2
            id_3, st_3, rv_3 = venous_pco2
            if not (id_1 == id_2 == id_3): continue
            td12 = get_hours_between_datetimes(st_1, st_2)
            td13 = get_hours_between_datetimes(st_1, st_3)
            if np.abs(td12) < 24 or np.abs(td13) < 24:
                x.append(rv_1)
                y.append(av_pco2)
                ids.append(id_1)

    print(len(x))
    print(len(np.unique(ids)))
    x = np.asarray(x)
    y = np.asarray(y)

    z = np.polyfit(x, y, 1)
    p = np.poly1d(z)

    plt.scatter(x, y)
    plt.plot(x, p(x), 'r--')
    plt.show()
Example #10
0
    patient_mrn = str(row.dossier)
    observation_name = row.servacro
    observation_value = row.rsltvalue
    observation_time = row.startdtm
    observation_unit = row.unitcd

    try:
        mapped_observation_name = map_observation_name(observation_name,
                                                       observation_unit)
    except:
        continue

    mapped_observation_time = map_time(observation_time)
    mapped_observation_value = map_float_value(observation_value)

    delta_hours = get_hours_between_datetimes(
        pcr_sample_times[str(patient_mrn)], str(observation_time))
    9
    if delta_hours < -48: continue

    observation_data_rows.append([
        patient_mrn, mapped_observation_name, mapped_observation_time,
        mapped_observation_value
    ])

print('Done fetching observations from Oacis')

df = sql_query(
    "SELECT * FROM dw_v01.oacis_lb WHERE " +
    "resultdtm IS NOT NULL AND longdesc in ('FIO2', 'Sat O2 Art', 'Température') AND "
    + "specimencollectiondtm > '2020-01-01' AND dossier in (" +
    ", ".join(patient_mrns) + ")")