def protocol_local(suffix: str, pid: int): pid_col_meds = "0" med_col_meds = "4" date_col_meds = "7" pid_col_diags = "8" diag_col_diags = "16" date_col_diags = "18" num_med_cols = 8 num_diag_cols = 13 left_medication_cols = [defCol(str(i), "INTEGER", pid) for i in range(num_med_cols)] medication = cc.create(suffix + "_medication", left_medication_cols, {pid}) left_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", pid) for i in range(num_diag_cols)] diagnosis = cc.create(suffix + "_diagnosis", left_diagnosis_cols, {pid}) shared_pids = cc.create("a_{}_shared_pids".format(suffix), [defCol(pid_col_meds, "INTEGER", pid)], {pid}) # only keep relevant columns medication_proj = cc.project(medication, "medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) medication_mine = cc.filter_by(medication_proj, "medication_mine", pid_col_meds, shared_pids, use_not_in=True) diagnosis_proj = cc.project(diagnosis, "diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) diagnosis_mine = cc.filter_by(diagnosis_proj, "diagnosis_mine", pid_col_diags, shared_pids, use_not_in=True) joined = cc.join(medication_mine, diagnosis_mine, "joined", [pid_col_meds], [pid_col_diags]) cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds) aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1) heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.distinct_count(heart_patients, "actual_" + suffix, pid_col_meds) return {medication, diagnosis}
def protocol(): input_columns_left = [ defCol("column_a", "INTEGER", [1]), defCol("column_b", "INTEGER", [1]) ] left = cc.create("left", input_columns_left, {1}) input_columns_right = [ defCol("column_a", "INTEGER", [1]), defCol("column_b", "INTEGER", [1]) ] right = cc.create("right", input_columns_right, {1}) rel = cc.concat([left, right], "rel") filtered = cc.cc_filter(rel, "filtered", "column_b", "==", scalar=1) in_order = cc.sort_by(filtered, "in_order", "column_a") cc.distinct_count(in_order, "expected", "column_a") return {left, right}
def protocol(all_pids: list): pid_col_meds = "0" med_col_meds = "4" date_col_meds = "7" pid_col_diags = "8" diag_col_diags = "16" date_col_diags = "18" num_med_cols = 8 num_diag_cols = 13 left_medication_cols = [ defCol(str(i), "INTEGER", 1) for i in range(num_med_cols) ] # public PID column left_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) left_medication = cc.create("left_medication", left_medication_cols, {1}) left_diagnosis_cols = [ defCol(str(i + num_med_cols), "INTEGER", 1) for i in range(num_diag_cols) ] # public PID column left_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1}) right_medication_cols = [ defCol(str(i), "INTEGER", 2) for i in range(num_med_cols) ] # public PID column right_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) right_medication = cc.create("right_medication", right_medication_cols, {2}) right_diagnosis_cols = [ defCol(str(i + num_med_cols), "INTEGER", 2) for i in range(num_diag_cols) ] # public PID column right_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2}) medication = cc.concat([left_medication, right_medication], "medication") diagnosis = cc.concat([left_diagnosis, right_diagnosis], "diagnosis") # only keep relevant columns medication_proj = cc.project(medication, "medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) diagnosis_proj = cc.project( diagnosis, "diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) joined = cc.join(medication_proj, diagnosis_proj, "joined", [pid_col_meds], [pid_col_diags]) cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds) aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1) heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.collect(cc.distinct_count(heart_patients, "actual", pid_col_meds), 1) return {left_medication, left_diagnosis, right_medication, right_diagnosis}
def protocol_mpc(all_pids: list): pid_col_meds = "0" med_col_meds = "4" date_col_meds = "7" pid_col_diags = "8" diag_col_diags = "16" date_col_diags = "18" num_med_cols = 8 num_diag_cols = 13 left_medication_cols = [defCol(str(i), "INTEGER", 1) for i in range(num_med_cols)] # public PID column left_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) left_medication = cc.create("left_medication", left_medication_cols, {1}) left_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", 1) for i in range(num_diag_cols)] # public PID column left_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1}) right_medication_cols = [defCol(str(i), "INTEGER", 2) for i in range(num_med_cols)] # public PID column right_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) right_medication = cc.create("right_medication", right_medication_cols, {2}) right_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", 2) for i in range(num_diag_cols)] # public PID column right_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2}) # Manual slicing left_keys = cc.union(left_medication, left_diagnosis, "left_pids", pid_col_meds, pid_col_diags) right_keys = cc.union(right_medication, right_diagnosis, "right_pids", pid_col_meds, pid_col_diags) left_shared_pids = cc._pub_intersect(left_keys, "a_left_shared_pids", pid_col_meds) cc._persist(left_shared_pids, "a_left_shared_pids") right_shared_pids = cc._pub_intersect(right_keys, "a_right_shared_pids", pid_col_meds, is_server=False) cc._persist(right_shared_pids, "a_right_shared_pids") left_medication_proj = cc.project(left_medication, "left_medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) left_medication_shared = cc.filter_by(left_medication_proj, "left_medication_shared", pid_col_meds, left_shared_pids) left_diagnosis_proj = cc.project(left_diagnosis, "left_diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) left_diagnosis_shared = cc.filter_by(left_diagnosis_proj, "left_diagnosis_shared", pid_col_diags, left_shared_pids) right_medication_proj = cc.project(right_medication, "right_medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) right_medication_shared = cc.filter_by(right_medication_proj, "right_medication_shared", pid_col_meds, right_shared_pids) right_diagnosis_proj = cc.project(right_diagnosis, "right_diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) right_diagnosis_shared = cc.filter_by(right_diagnosis_proj, "right_diagnosis_shared", pid_col_diags, right_shared_pids) # Slicing done medication_shared = cc.concat([left_medication_shared, right_medication_shared], "medication_shared") diagnosis_shared = cc.concat([left_diagnosis_shared, right_diagnosis_shared], "diagnosis_shared") joined = cc.join(medication_shared, diagnosis_shared, "joined", [pid_col_meds], [pid_col_diags]) cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds) aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1) heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.collect(cc.distinct_count(heart_patients, "actual_mpc", pid_col_meds), 1) return { left_medication, left_diagnosis, right_medication, right_diagnosis }