def protocol(): # define inputs left_cols = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] left = cc.create("left", left_cols, {1}) left_dummy = cc.project(left, "zzz_left_dummy", ["a", "b"]) right_cols = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] right = cc.create("right", right_cols, {2}) right_dummy = cc.project(right, "right_dummy", ["c", "d"]) actual = cc.join(left_dummy, right_dummy, "actual", ["a"], ["c"]) cc.collect(actual, 1) # create dag return {left, right}
def protocol(): # define inputs colsIn1 = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])] in1 = sal.create("govreg", colsIn1, set([1])) colsIn2 = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])] in2 = sal.create("company0", colsIn2, set([2])) colsIn3 = [defCol("c", "INTEGER", [3]), defCol("d", "INTEGER", [3])] in3 = sal.create("company1", colsIn3, set([3])) cl1 = sal._close(in1, "cl1", set([1, 2, 3])) projA = sal.project(cl1, "projA", ["a", "b"]) cl2 = sal._close(in2, "cl2", set([1, 2, 3])) cl3 = sal._close(in3, "cl3", set([1, 2, 3])) right_rel = sal.concat([cl2, cl3], "right_rel") projB = sal.project(right_rel, "projB", ["c", "d"]) joined = sal.join(projA, right_rel, "joined", ["a"], ["c"]) agg = sal.aggregate(joined, "agg", ["b"], "d", "+", "total") opened = sal._open(agg, "opened", 1) return set([in1, in2, in3])
def protocol(): govreg_cols = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])] govreg = cc.create("govreg", govreg_cols, {1}) company0_cols = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] company0 = cc.create("company0", company0_cols, {2}) company1_cols = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] company1 = cc.create("company1", company1_cols, {3}) companies = cc.concat([company0, company1], "companies") joined = cc.join(govreg, companies, "joined", ["a"], ["c"]) actual = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total") cc.collect(actual, 1) return {govreg, company0, company1}
def protocol(): # define inputs colsIn1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("in1", colsIn1, set([1])) proj1 = sal.project(in1, "proj1", ["a", "b"]) colsIn2 = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in2 = sal.create("in2", colsIn2, set([2])) proj2 = sal.project(in2, "proj2", ["c", "d"]) res = sal.join(proj1, proj2, "res", ["a"], ["c"]) # open result to party 1 sal.collect(res, 1) # return roots of dag return set([in1, in2])
def protocol(all_pids: list): pid_col_meds = "0" med_col_meds = "4" date_col_meds = "7" pid_col_diags = "8" diag_col_diags = "16" date_col_diags = "18" num_med_cols = 8 num_diag_cols = 13 left_medication_cols = [ defCol(str(i), "INTEGER", 1) for i in range(num_med_cols) ] # public PID column left_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) left_medication = cc.create("left_medication", left_medication_cols, {1}) left_diagnosis_cols = [ defCol(str(i + num_med_cols), "INTEGER", 1) for i in range(num_diag_cols) ] # public PID column left_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1}) right_medication_cols = [ defCol(str(i), "INTEGER", 2) for i in range(num_med_cols) ] # public PID column right_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) right_medication = cc.create("right_medication", right_medication_cols, {2}) right_diagnosis_cols = [ defCol(str(i + num_med_cols), "INTEGER", 2) for i in range(num_diag_cols) ] # public PID column right_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2}) medication = cc.concat([left_medication, right_medication], "medication") diagnosis = cc.concat([left_diagnosis, right_diagnosis], "diagnosis") # only keep relevant columns medication_proj = cc.project(medication, "medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) diagnosis_proj = cc.project( diagnosis, "diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) joined = cc.join(medication_proj, diagnosis_proj, "joined", [pid_col_meds], [pid_col_diags]) cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds) aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1) heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.collect(cc.distinct_count(heart_patients, "actual", pid_col_meds), 1) return {left_medication, left_diagnosis, right_medication, right_diagnosis}
def protocol_mpc(all_pids: list): pid_col_meds = "0" med_col_meds = "4" date_col_meds = "7" pid_col_diags = "8" diag_col_diags = "16" date_col_diags = "18" num_med_cols = 8 num_diag_cols = 13 left_medication_cols = [defCol(str(i), "INTEGER", 1) for i in range(num_med_cols)] # public PID column left_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) left_medication = cc.create("left_medication", left_medication_cols, {1}) left_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", 1) for i in range(num_diag_cols)] # public PID column left_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1}) right_medication_cols = [defCol(str(i), "INTEGER", 2) for i in range(num_med_cols)] # public PID column right_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids) right_medication = cc.create("right_medication", right_medication_cols, {2}) right_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", 2) for i in range(num_diag_cols)] # public PID column right_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids) right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2}) # Manual slicing left_keys = cc.union(left_medication, left_diagnosis, "left_pids", pid_col_meds, pid_col_diags) right_keys = cc.union(right_medication, right_diagnosis, "right_pids", pid_col_meds, pid_col_diags) left_shared_pids = cc._pub_intersect(left_keys, "a_left_shared_pids", pid_col_meds) cc._persist(left_shared_pids, "a_left_shared_pids") right_shared_pids = cc._pub_intersect(right_keys, "a_right_shared_pids", pid_col_meds, is_server=False) cc._persist(right_shared_pids, "a_right_shared_pids") left_medication_proj = cc.project(left_medication, "left_medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) left_medication_shared = cc.filter_by(left_medication_proj, "left_medication_shared", pid_col_meds, left_shared_pids) left_diagnosis_proj = cc.project(left_diagnosis, "left_diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) left_diagnosis_shared = cc.filter_by(left_diagnosis_proj, "left_diagnosis_shared", pid_col_diags, left_shared_pids) right_medication_proj = cc.project(right_medication, "right_medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) right_medication_shared = cc.filter_by(right_medication_proj, "right_medication_shared", pid_col_meds, right_shared_pids) right_diagnosis_proj = cc.project(right_diagnosis, "right_diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) right_diagnosis_shared = cc.filter_by(right_diagnosis_proj, "right_diagnosis_shared", pid_col_diags, right_shared_pids) # Slicing done medication_shared = cc.concat([left_medication_shared, right_medication_shared], "medication_shared") diagnosis_shared = cc.concat([left_diagnosis_shared, right_diagnosis_shared], "diagnosis_shared") joined = cc.join(medication_shared, diagnosis_shared, "joined", [pid_col_meds], [pid_col_diags]) cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds) aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1) heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.collect(cc.distinct_count(heart_patients, "actual_mpc", pid_col_meds), 1) return { left_medication, left_diagnosis, right_medication, right_diagnosis }
def protocol(): # define inputs colsInA = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("in1", colsInA, set([1])) in1.isMPC = False proja = sal.project(in1, "proja", ["a", "b"]) proja.isMPC = False proja.out_rel.storedWith = set([1]) colsInB = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in2 = sal.create("in2", colsInB, set([2])) in2.isMPC = False projb = sal.project(in2, "projb", ["c", "d"]) projb.isMPC = False projb.out_rel.storedWith = set([2]) clA = sal._close(proja, "clA", set([1, 2, 3])) clA.isMPC = True clB = sal._close(projb, "clB", set([1, 2, 3])) clB.isMPC = True persistedA = sal._persist(clA, "persistedA") persistedB = sal._persist(clB, "persistedB") keysaclosed = sal.project(clA, "keysaclosed", ["a"]) keysaclosed.out_rel.storedWith = set([1, 2, 3]) keysaclosed.isMPC = True keysbclosed = sal.project(clB, "keysbclosed", ["c"]) keysbclosed.isMPC = True keysbclosed.out_rel.storedWith = set([1, 2, 3]) keysa = sal._open(keysaclosed, "keysa", 1) keysa.isMPC = True keysb = sal._open(keysbclosed, "keysb", 1) keysb.isMPC = True indexedA = sal.index(keysa, "indexedA", "indexA") indexedA.isMPC = False indexedA.out_rel.storedWith = set([1]) indexedB = sal.index(keysb, "indexedB", "indexB") indexedB.isMPC = False indexedB.out_rel.storedWith = set([1]) joinedindeces = sal.join( indexedA, indexedB, "joinedindeces", ["a"], ["c"]) joinedindeces.isMPC = False joinedindeces.out_rel.storedWith = set([1]) indecesonly = sal.project( joinedindeces, "indecesonly", ["indexA", "indexB"]) indecesonly.isMPC = False indecesonly.out_rel.storedWith = set([1]) indecesclosed = sal._close( indecesonly, "indecesclosed", set([1, 2, 3])) indecesclosed.isMPC = True joined = sal._index_join(persistedA, persistedB, "joined", ["a"], ["c"], indecesclosed) joined.isMPC = True sal._open(joined, "opened", 1) # create condag return set([in1, in2])
def hybrid_join(): # define inputs colsInA = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("govreg", colsInA, {1}) in1.isMPC = False proja = sal.project(in1, "proja", ["a", "b"]) proja.isMPC = False proja.out_rel.storedWith = {1} colsInB = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in2 = sal.create("company0", colsInB, {2}) in2.isMPC = False projb = sal.project(in2, "projb", ["c", "d"]) projb.isMPC = False projb.out_rel.storedWith = {2} colsInC = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in3 = sal.create("company1", colsInC, {3}) in3.isMPC = False projc = sal.project(in3, "projc", ["c", "d"]) projc.isMPC = False projc.out_rel.storedWith = {3} clA = sal._close(proja, "clA", {1, 2, 3}) clA.isMPC = True clB = sal._close(projb, "clB", {1, 2, 3}) clB.isMPC = True clC = sal._close(projc, "clC", {1, 2, 3}) clC.isMPC = True rightClosed = sal.concat([clB, clC], "clD") rightClosed.isMPC = True rightClosed.out_rel.storedWith = {1, 2, 3} shuffledA = sal.shuffle(clA, "shuffledA") shuffledA.isMPC = True persistedA = sal._persist(shuffledA, "persistedA") persistedA.isMPC = True shuffledB = sal.shuffle(rightClosed, "shuffledB") shuffledB.isMPC = True persistedB = sal._persist(shuffledB, "persistedB") persistedB.isMPC = True keysaclosed = sal.project(shuffledA, "keysaclosed", ["a"]) keysaclosed.out_rel.storedWith = {1, 2, 3} keysaclosed.isMPC = True keysbclosed = sal.project(shuffledB, "keysbclosed", ["c"]) keysbclosed.isMPC = True keysbclosed.out_rel.storedWith = {1, 2, 3} keysa = sal._open(keysaclosed, "keysa", 1) keysa.isMPC = True keysb = sal._open(keysbclosed, "keysb", 1) keysb.isMPC = True indexedA = sal.index(keysa, "indexedA", "indexA") indexedA.isMPC = False indexedA.out_rel.storedWith = {1} indexedB = sal.index(keysb, "indexedB", "indexB") indexedB.isMPC = False indexedB.out_rel.storedWith = {1} joinedindeces = sal.join(indexedA, indexedB, "joinedindeces", ["a"], ["c"]) joinedindeces.isMPC = False joinedindeces.out_rel.storedWith = {1} indecesonly = sal.project(joinedindeces, "indecesonly", ["indexA", "indexB"]) indecesonly.isMPC = False indecesonly.out_rel.storedWith = {1} indecesclosed = sal._close(indecesonly, "indecesclosed", {1, 2, 3}) indecesclosed.isMPC = True joined = sal._index_join(persistedA, persistedB, "joined", ["a"], ["c"], indecesclosed) joined.isMPC = True return joined, {in1, in2, in3}
def protocol(): cols_concatenated_DFs = [ defCol("store_code_uc", "STRING", [1]), defCol('upc', 'STRING', [1]), defCol('week_end', 'STRING', [1]), defCol('q', 'INTEGER', [1]), defCol('avg_unit_p', 'FLOAT', [1]), defCol('retailer_code', 'STRING', [1]), defCol('store_zip3', 'STRING', [1]) ] cols_temp_UPC_brandBU_crspnd = [ defCol('brand_code_bu', 'STRING', [2]), defCol('brand_descr_bu', 'STRING', [2]), defCol('upc', 'STRING', [2]), defCol('size1_amount', 'FLOAT', [2]), ] # concatenated DFs from local_workflow.py concatenated_DFs = sal.create('concatenated_DFs', cols_concatenated_DFs, set([1])) # the output of preprocess_products.py temp_UPC_brandBU_crspnd = sal.create('temp_UPC_brandBU_crspnd', cols_temp_UPC_brandBU_crspnd, set([1])) ''' SECTION 1 Compute the quantity weighted average price per unit & total quantity sold at the store-brand level ''' w_upc = sal.join(concatenated_DFs, temp_UPC_brandBU_crspnd, 'w_upc', ['upc'], ['upc']) w_avg_OZ_p = sal.divide(w_upc, 'w_avg_OZ_p', 'avg_OZ_p', ['avg_unit_p', 'size1_amount']) w_q_upd = sal.multiply(w_avg_OZ_p, 'w_q_upd', 'q', ['q', 'size1_amount']) brand_OZq_sum = sal.aggregate( w_q_upd, 'brand_OZq_sum', ['store_code_uc', 'brand_code_bu', 'week_end'], 'q', '+', 'brand_OZq') total_brnd_OZq = sal.join(w_q_upd, brand_OZq_sum, 'total_brnd_OZq', ['store_code_uc', 'brand_code_bu', 'week_end'], ['store_code_uc', 'brand_code_bu', 'week_end']) w_wghtd_OZ_brnd_p = sal.multiply(total_brnd_OZq, 'w_wghtd_OZ_brnd_p', 'wghtd_OZ_brnd_p', ['q', 'avg_OZ_p']) w_wghtd_OZ_brnd_p_final = sal.divide(w_wghtd_OZ_brnd_p, 'w_wghtd_OZ_brnd_p_final', 'wghtd_OZ_brnd_p', ['wghtd_OZ_brnd_p', 'brand_OZq']) brnd_p_sum = sal.aggregate(w_wghtd_OZ_brnd_p_final, 'brnd_p_sum', ['store_code_uc', 'brand_code_bu', 'week_end'], 'wghtd_OZ_brnd_p', '+', 'avg_OZ_brnd_p') result = sal.join(brnd_p_sum, w_wghtd_OZ_brnd_p_final, 'result', ['store_code_uc', 'brand_code_bu', 'week_end'], ['store_code_uc', 'brand_code_bu', 'week_end']) section_one_result = sal.project(result, 'section_one_result', [ "avg_OZ_brnd_p", "week_end", "store_code_uc", "brand_code_bu", "brand_descr_bu", "brand_OZq", 'retailer_code', 'store_zip3', 'q' ]) ''' SECTION 2 Compute the average price per OZ & total OZs sold for each brand at the retailer-$geo_unit level, by compiling the store level data that comprises each retailer-$geo_unit. Compute the total quantity sold by each retailer-$geo_unit ''' temp_sum = sal.aggregate( section_one_result, 'temp_sum', ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'], 'brand_OZq', '+', 'brand_OZq') result_brnd_sum = sal.join( section_one_result, temp_sum, 'result_brnd_sum', ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'], ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end']) wghtd_p_mult = sal.multiply(result_brnd_sum, 'wghtd_p_mult', 'wghtd_p', ['brand_OZq', 'avg_OZ_brnd_p']) wghtd_p_final = sal.divide(wghtd_p_mult, 'wghtd_p_final', 'wghtd_p', ['wghtd_p', 'q']) wghtd_p_sum = sal.aggregate( wghtd_p_final, 'wghtd_p_sum', ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'], 'wghtd_p', '+', 'p') sec_4_result = sal.join( wghtd_p_final, wghtd_p_sum, 'sec_4_result', ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'], ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end']) # TODO: filter out sec_4_result rows where 'store_zip3' cell is empty final = sal.project(sec_4_result, 'final', [ 'store_zip3', 'retailer_code', 'week_end', 'brand_code_bu', 'brand_descr_bu', 'q', 'p' ]) opened = sal.collect(final, 1) return set([concatenated_DFs, temp_UPC_brandBU_crspnd])
def hybrid_join(): # define inputs cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_1 = sal.create("govreg", cols_in_a, set([1])) in_1.is_mpc = False proj_a = sal.project(in_1, "proj_a", ["a", "b"]) proj_a.is_mpc = False proj_a.out_rel.stored_with = set([1]) cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_2 = sal.create("company0", cols_in_b, set([2])) in_2.is_mpc = False proj_b = sal.project(in_2, "proj_b", ["c", "d"]) proj_b.is_mpc = False proj_b.out_rel.stored_with = set([2]) cols_in_c = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in_3 = sal.create("company1", cols_in_c, set([3])) in_3.is_mpc = False proj_c = sal.project(in_3, "proj_c", ["c", "d"]) proj_c.is_mpc = False proj_c.out_rel.stored_with = set([3]) cl_a = sal._close(proj_a, "cl_a", set([1, 2, 3])) cl_a.is_mpc = True cl_b = sal._close(proj_b, "cl_b", set([1, 2, 3])) cl_b.is_mpc = True cl_c = sal._close(proj_c, "cl_c", set([1, 2, 3])) cl_c.is_mpc = True right_closed = sal.concat([cl_b, cl_c], "clD") right_closed.is_mpc = True right_closed.out_rel.stored_with = set([1, 2, 3]) shuffled_a = sal.shuffle(cl_a, "shuffled_a") shuffled_a.is_mpc = True persisted_a = sal._persist(shuffled_a, "persisted_a") persisted_a.is_mpc = True shuffled_b = sal.shuffle(right_closed, "shuffled_b") shuffled_b.is_mpc = True persisted_b = sal._persist(shuffled_b, "persisted_b") persisted_b.is_mpc = True keys_a_closed = sal.project(shuffled_a, "keys_a_closed", ["a"]) keys_a_closed.out_rel.stored_with = set([1, 2, 3]) keys_a_closed.is_mpc = True keys_b_closed = sal.project(shuffled_b, "keys_b_closed", ["c"]) keys_b_closed.is_mpc = True keys_b_closed.out_rel.stored_with = set([1, 2, 3]) keys_a = sal._open(keys_a_closed, "keys_a", 1) keys_a.is_mpc = True keys_b = sal._open(keys_b_closed, "keys_b", 1) keys_b.is_mpc = True indexed_a = sal.index(keys_a, "indexed_a", "index_a") indexed_a.is_mpc = False indexed_a.out_rel.stored_with = set([1]) indexed_b = sal.index(keys_b, "indexed_b", "index_b") indexed_b.is_mpc = False indexed_b.out_rel.stored_with = set([1]) joined_indeces = sal.join( indexed_a, indexed_b, "joined_indeces", ["a"], ["c"]) joined_indeces.is_mpc = False joined_indeces.out_rel.stored_with = set([1]) indeces_only = sal.project( joined_indeces, "indeces_only", ["index_a", "index_b"]) indeces_only.is_mpc = False indeces_only.out_rel.stored_with = set([1]) indeces_closed = sal._close( indeces_only, "indeces_closed", set([1, 2, 3])) indeces_closed.is_mpc = True joined = sal._index_join(persisted_a, persisted_b, "joined", [ "a"], ["c"], indeces_closed) joined.is_mpc = True return joined, set([in_1, in_2, in_3])
def protocol_local(suffix: str, pid: int): pid_col_meds = "0" med_col_meds = "4" date_col_meds = "7" pid_col_diags = "8" diag_col_diags = "16" date_col_diags = "18" num_med_cols = 8 num_diag_cols = 13 left_medication_cols = [ defCol(str(i), "INTEGER", pid) for i in range(num_med_cols) ] medication = cc.create(suffix + "_medication", left_medication_cols, {pid}) left_diagnosis_cols = [ defCol(str(i + num_med_cols), "INTEGER", pid) for i in range(num_diag_cols) ] diagnosis = cc.create(suffix + "_diagnosis", left_diagnosis_cols, {pid}) shared_pids = cc.create("a_{}_shared_pids".format(suffix), [defCol(pid_col_meds, "INTEGER", pid)], {pid}) # only keep relevant columns medication_proj = cc.project(medication, "medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) medication_mine = cc.filter_by(medication_proj, "medication_mine", pid_col_meds, shared_pids, use_not_in=True) diagnosis_proj = cc.project( diagnosis, "diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) diagnosis_mine = cc.filter_by(diagnosis_proj, "diagnosis_mine", pid_col_diags, shared_pids, use_not_in=True) joined = cc.join(medication_mine, diagnosis_mine, "joined", [pid_col_meds], [pid_col_diags]) cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds) aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1) heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.distinct_count(heart_patients, "actual_" + suffix, pid_col_meds) return {medication, diagnosis}