def compare_labs_by(conn, col, val_pos, val_neg, lab_name, min_value=0, max_value=999999, query_tail=''): query = "SELECT lab_result_value from lab_data " + \ " INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " lab_data.lab_name = '" + lab_name + "' AND " + \ " lab_data.lab_result_status = 'resulted' AND " + \ " patient_data." + col + " = " res = sql_fetch_all(conn, query + " '" + val_pos + "' " + query_tail) values_pos = [float(value[0]) for value in res] res = sql_fetch_all(conn, query + " '" + val_neg + "' " + query_tail) values_neg = [float(value[0]) for value in res] plot_compare_kde(lab_name, col, val_pos, val_neg, values_pos, \ values_neg, min_value, max_value)
def compare_obs_by(conn, col, val_pos, val_neg, obs_name, min_value=0, max_value=999999, query_tail=''): query = "SELECT observation_value from observation_data " + \ " INNER JOIN patient_data ON " + \ " observation_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " observation_data.observation_name = '"+obs_name+"' AND " + \ " observation_data.observation_value NOT NULL AND " + \ " patient_data." + col + " = " res = sql_fetch_all(conn, query + " '" + val_pos + "' " + query_tail) values_pos = [float(value[0]) for value in res] res = sql_fetch_all(conn, query + " '" + val_neg + "' " + query_tail) values_neg = [float(value[0]) for value in res] plot_compare_kde(obs_name, col, val_pos, val_neg, values_pos, \ values_neg, min_value, max_value)
def compare_drugs_by_death(conn, drug_names): query = "SELECT patient_site_uid from patient_data WHERE " + \ "patient_covid_status = 'positive'" all_patients = set([rec[0] for rec in sql_fetch_all(conn, query)]) query = "SELECT patient_site_uid from patient_data WHERE " + \ "patient_covid_status = 'positive' AND patient_vital_status = 'dead'" dead_patients = set([rec[0] for rec in sql_fetch_all(conn, query)]) alive_patients = all_patients.difference(dead_patients) query = "SELECT drug_data.patient_site_uid from drug_data INNER JOIN " + \ " patient_data ON patient_data.patient_site_uid = " + \ " drug_data.patient_site_uid WHERE " + \ " patient_data.patient_covid_status = 'positive' AND " + \ " drug_name IN ('" + "', '".join(drug_names) + "')" exposed_patients = set([rec[0] for rec in sql_fetch_all(conn, query)]) nonexposed_patients = all_patients.difference(exposed_patients) a = len(list(exposed_patients & dead_patients)) b = len(list(exposed_patients & alive_patients)) c = len(list(nonexposed_patients & dead_patients)) d = len(list(nonexposed_patients & alive_patients)) if len(exposed_patients) == 0: return [-1, None, None, None] odds_ratio = (float(0.5+a)/float(0.5+c)) / \ (float(0.5+b) / float(0.5+d)) odds_ratio = round(odds_ratio, 2) log_or = log(odds_ratio) se_log_or = sqrt(1/float(0.5+a) + 1/float(0.5+b) + \ 1/float(0.5+c) + 1/float(0.5+d)) lower_ci = exp(log_or - 1.96 * se_log_or) upper_ci = exp(log_or + 1.96 * se_log_or) return [odds_ratio, lower_ci, upper_ci, len(exposed_patients)]
def compute_drug_odds_ratios(conn): query = "SELECT drug_name from drug_data" drugs = list(set([rec[0] for rec in sql_fetch_all(conn, query)])) odds_ratios = [] for drug in drugs: if "'" in drug: continue odds = compare_drugs_by_death(conn, [drug]) if odds[0] == -1: continue if odds[-1] < 5: continue odds_ratios.append([drug, odds]) odds_ratios.sort(key=lambda x: x[1][0]) for drug, odds in odds_ratios: print('%s - OR = %.2f (%.2f-%.2f), N exposed = %d' % \ (drug, odds[0], odds[1], odds[2], odds[3]) )
def summarize_variable(table, variable): query = "SELECT %s FROM %s " % (variable, table) if table == 'patient_data': query += "WHERE patient_covid_status = 'positive'" else: query += (" INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " " patient_data.patient_covid_status = 'positive'") values = np.asarray( [x[0] for x in sql_fetch_all(conn, query) if x[0] is not None]) print("Summary for variable %s" % variable) print("Mean: %.2f" % np.mean(values)) print("Std: %.2f" % np.std(values)) pct_above70 = np.count_nonzero(values > 70) / len(values) print("Proportion above 70: %.2f" % pct_above70)
db_file_name = os.path.join(SQLITE_DIRECTORY, 'covidb_version-1.0.0.db') conn = sqlite3.connect(db_file_name) query_icu = "SELECT episode_data.patient_site_uid from episode_data INNER JOIN " + \ " patient_data ON episode_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " (episode_data.episode_unit_type = 'intensive_care_unit' OR " + \ " episode_data.episode_unit_type = 'high_dependency_unit') " + \ inclusion_flag query_deaths = "SELECT diagnosis_data.patient_site_uid from diagnosis_data INNER JOIN " + \ " patient_data ON diagnosis_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " diagnosis_data.diagnosis_type = 'death' " + \ inclusion_flag icu_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_icu)]) death_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_deaths)]) query = "SELECT episode_data.patient_site_uid, episode_start_time from episode_data INNER JOIN " + \ " patient_data ON episode_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " (episode_data.episode_unit_type = 'inpatient_ward' OR " + \ " episode_data.episode_unit_type = 'emergency_room' OR " + \ " episode_data.episode_unit_type = 'intensive_care_unit') " + \ inclusion_flag res = sql_fetch_all(conn, query) eligible_patients = set() eligible_episodes = {} for patient_id, episode_start_time in res:
db_file_name = os.path.join(SQLITE_DIRECTORY, 'covidb_version-1.0.0.db') conn = sqlite3.connect(db_file_name) def map_age(age): if age is None: return 60 else: return float(age) def map_sex(sex): if sex == 'male': return 1 else: return 0 query_info = "SELECT patient_site_uid, patient_age, patient_sex from patient_data WHERE " + \ " patient_data.patient_covid_status = 'positive'" pt_infos = dict((str(x[0]), [map_age(x[1]), map_sex(x[2])]) for x in sql_fetch_all(conn, query_info)) query_icu = "SELECT episode_data.patient_site_uid from episode_data INNER JOIN " + \ " patient_data ON episode_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " (episode_data.episode_unit_type = 'intensive_care_unit' OR " + \ " episode_data.episode_unit_type = 'high_dependency_unit') AND " + \ " patient_data.patient_covid_status = 'positive'" query_deaths = "SELECT diagnosis_data.patient_site_uid from diagnosis_data INNER JOIN " + \ " patient_data ON diagnosis_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " diagnosis_data.diagnosis_type = 'death' AND " + \ " patient_data.patient_covid_status = 'positive'" icu_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_icu)]) death_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_deaths)])
res = sql_fetch_all(conn, query + " '" + val_neg + "' " + query_tail) values_neg = [float(value[0]) for value in res] plot_compare_kde(obs_name, col, val_pos, val_neg, values_pos, \ values_neg, min_value, max_value) fig, axs = plt.subplots(5, 5) fig.set_figheight(15) fig.set_figwidth(15) db_file_name = os.path.join(SQLITE_DIRECTORY, 'covidb_version-1.0.0.db') conn = sqlite3.connect(db_file_name) res = sql_fetch_all(conn, "SELECT * FROM drug_data WHERE drug_name LIKE '%parin%' AND drug_roa='sc'") plt.sca(axs[0, 0]) compare_labs_by_covid(conn, 'lymphocyte_count', min_value=0.1, max_value=10) plt.sca(axs[0, 1]) compare_labs_by_covid(conn, 'c_reactive_protein', min_value=50, max_value=350) plt.sca(axs[0, 2]) compare_labs_by_covid(conn, 'd_dimer', min_value=100, max_value=6000) plt.sca(axs[0, 3]) compare_labs_by_covid(conn, 'mean_platelet_volume', min_value=8, max_value=12) plt.sca(axs[0, 4]) compare_labs_by_covid(conn, 'phosphate', min_value=0.5, max_value=2.5)
def correlate_labs(conn): query_art_pco2 = "SELECT patient_data.patient_site_uid, " + \ " lab_sample_time, lab_result_value from lab_data " + \ " INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " lab_data.lab_name = 'pco2' AND " + \ " lab_data.lab_sample_site = 'arterial_blood' AND " + \ " lab_data.lab_result_status = 'resulted' AND " + \ " patient_data.patient_covid_status = 'positive'" query_ven_pco2 = "SELECT patient_data.patient_site_uid, " + \ " lab_sample_time, lab_result_value from lab_data " + \ " INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " lab_data.lab_name = 'pco2' AND " + \ " lab_data.lab_sample_site = 'venous_blood' AND " + \ " lab_data.lab_result_status = 'resulted' AND " + \ " patient_data.patient_covid_status = 'positive'" query_d_dimer = "SELECT patient_data.patient_site_uid, " + \ " lab_sample_time, lab_result_value from lab_data " + \ " INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " lab_data.lab_name = 'd_dimer' AND " + \ " lab_data.lab_result_status = 'resulted' AND " + \ " patient_data.patient_covid_status = 'positive'" arterial_pco2s = sql_fetch_all(conn, query_art_pco2) venous_pco2s = sql_fetch_all(conn, query_ven_pco2) d_dimers = sql_fetch_all(conn, query_d_dimer) pairs = [] for arterial_pco2 in arterial_pco2s: id_1, st_1, rv_1 = arterial_pco2 for venous_pco2 in venous_pco2s: id_2, st_2, rv_2 = venous_pco2 if id_1 != id_2: continue av_pco2 = rv_2 - rv_1 if av_pco2 < 0: continue dt12 = get_hours_between_datetimes(st_1, st_2) if dt12 < 2: pairs.append([arterial_pco2, venous_pco2]) x = [] y = [] ids = [] for d_dimer in d_dimers: id_1, st_1, rv_1 = d_dimer for arterial_pco2, venous_pco2 in pairs: id_2, st_2, rv_2 = arterial_pco2 id_3, st_3, rv_3 = venous_pco2 if not (id_1 == id_2 == id_3): continue td12 = get_hours_between_datetimes(st_1, st_2) td13 = get_hours_between_datetimes(st_1, st_3) if np.abs(td12) < 24 or np.abs(td13) < 24: x.append(rv_1) y.append(av_pco2) ids.append(id_1) print(len(x)) print(len(np.unique(ids))) x = np.asarray(x) y = np.asarray(y) z = np.polyfit(x, y, 1) p = np.poly1d(z) plt.scatter(x, y) plt.plot(x, p(x), 'r--') plt.show()