def protocol(): left_one_cols = [ defCol("a", "INTEGER", 1, 2, 3), defCol("b", "INTEGER", 1) ] left_one = cc.create("left_one", left_one_cols, {1}) right_one_cols = [ defCol("c", "INTEGER", 1, 2, 3), defCol("d", "INTEGER", 1) ] right_one = cc.create("right_one", right_one_cols, {1}) left_two_cols = [ defCol("a", "INTEGER", 1, 2, 3), defCol("b", "INTEGER", 2) ] left_two = cc.create("left_two", left_two_cols, {2}) right_two_cols = [ defCol("c", "INTEGER", 1, 2, 3), defCol("d", "INTEGER", 2) ] right_two = cc.create("right_two", right_two_cols, {2}) left = cc.concat([left_one, left_two], "left") right = cc.concat([right_one, right_two], "right") joined = cc.join(left, right, "actual", ["a"], ["c"]) cc.collect(joined, 1) return {left_one, left_two, right_one, right_two}
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), defCol("c", "INTEGER", [1]) ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]), defCol("c", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) # combine parties' inputs into one relation rel = cc.concat([in_1, in_2], "rel") projected = cc.project(rel, "projected", ["c", "b"]) # specify the workflow filtered = cc.cc_filter(projected, "filtered", "c", "==", other_col_name="b") cc.collect(filtered, 1) # return root nodes return {in_1, in_2}
def setup(): # define inputs colsIn1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), defCol("c", "INTEGER", [1]), defCol("d", "INTEGER", [1]) ] colsIn2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]), defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2]) ] colsIn3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]), defCol("c", "INTEGER", [3]), defCol("d", "INTEGER", [3]) ] in1 = sal.create("in1", colsIn1, set([1])) in2 = sal.create("in2", colsIn2, set([2])) in3 = sal.create("in3", colsIn3, set([3])) cl1 = sal._close(in1, "cl1", set([1, 2, 3])) cl2 = sal._close(in2, "cl2", set([1, 2, 3])) cl3 = sal._close(in3, "cl3", set([1, 2, 3])) rel = sal.concat([cl1, cl2, cl3], "rel") return set([in1, in2, in3]), rel
def protocol(): govreg_cols = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] govreg = cc.create("govreg", govreg_cols, {1}) company0_cols = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] company0 = cc.create("company0", company0_cols, {2}) company1_cols = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] company1 = cc.create("company1", company1_cols, {3}) companies = cc.concat([company0, company1], "companies") joined = cc.join(govreg, companies, "joined", ["a"], ["c"]) actual = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total") cc.collect(actual, 1) return {govreg, company0, company1}
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) cols_in_3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]) ] in_3 = cc.create("in_3", cols_in_3, {3}) # combine parties' inputs into one relation rel = cc.concat([in_1, in_2, in_3], "rel") cc.collect(rel, 1) # return root nodes return {in_1, in_2, in_3}
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = sal.create("in_1", cols_in_1, set([1])) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_2, set([2])) cols_in_3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]) ] in_3 = sal.create("in_3", cols_in_3, set([3])) # combine parties' inputs into one relation rel = sal.concat([in_1, in_2, in_3], "rel") sal.collect(rel, 1) # return root nodes return set([in_1, in_2, in_3])
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) cols_in_3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]) ] in_3 = cc.create("in_3", cols_in_3, {3}) # combine parties' inputs into one relation rel = cc.concat([in_1, in_2, in_3], "rel") proj = cc.project(rel, "proj", ["a", "b"]) agg = cc.aggregate(proj, "agg", ["a"], "b", "sum", "total_b") cc.collect(agg, 1) # return root nodes return {in_1, in_2, in_3}
def protocol(): # define inputs colsInA = [("INTEGER", set([1])), ("INTEGER", set([1]))] inA = sal.create("inA", colsInA, set([1])) colsInB = [("INTEGER", set([2])), ("INTEGER", set([2]))] inB = sal.create("inB", colsInB, set([2])) colsInC = [("INTEGER", set([3])), ("INTEGER", set([3])), ("INTEGER", set([3]))] inC = sal.create("inC", colsInC, set([3])) # specify the workflow aggA = sal.aggregate(inA, "aggA", "inA_0", "inA_1", "+") projA = sal.project(aggA, "projA", ["aggA_0", "aggA_1"]) aggB = sal.aggregate(inB, "aggB", "inB_0", "inB_1", "+") projB = sal.project(aggB, "projB", ["aggB_0", "aggB_1"]) joined = sal.join(projA, projB, "joined", "projA_0", "projB_0") comb = sal.concat([inC, joined], "comb") sal.collect(comb, 3) # create condag return set([inA, inB, inC])
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = sal.create("in_1", cols_in_1, set([1])) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_2, set([2])) cols_in_3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]) ] in_3 = sal.create("in_3", cols_in_3, set([3])) # combine parties' inputs into one relation rel = sal.concat([in_1, in_2, in_3], "rel") proj = sal.project(rel, "proj", ["a", "b"]) agg = sal.aggregate(proj, "agg", ["a"], "b", "+", "total_b") sal.collect(agg, 1) # return root nodes return set([in_1, in_2, in_3])
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = sal.create("in_1", cols_in_1, set([1])) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_2, set([2])) # combine parties' inputs into one relation rel = sal.concat([in_1, in_2], "rel") # specify the workflow proj_a = sal.project(rel, "proj_a", ["a", "b"]) proj_b = sal.project(proj_a, "proj_b", ["a", "b"]) agg = sal.aggregate(proj_b, "agg", ["a"], "b", "+", "total_b") proj_c = sal.project(agg, "proj_c", ["a", "total_b"]) sal.collect(proj_c, 1) # return root nodes return set([in_1, in_2])
def setup(): # define inputs colsIn1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), defCol("c", "INTEGER", [1]), defCol("d", "INTEGER", [1]) ] colsIn2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]), defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2]) ] colsIn3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]), defCol("c", "INTEGER", [3]), defCol("d", "INTEGER", [3]) ] in1 = sal.create("in1", colsIn1, {1}) in2 = sal.create("in2", colsIn2, {2}) in3 = sal.create("in3", colsIn3, {3}) cl1 = sal._close(in1, "cl1", {1, 2, 3}) cl2 = sal._close(in2, "cl2", {1, 2, 3}) cl3 = sal._close(in3, "cl3", {1, 2, 3}) rel = sal.concat([cl1, cl2, cl3], "rel") return {in1, in2, in3}, rel
def protocol(): inpts = setup() in_1, in_2 = inpts[0], inpts[1] cc = sal.concat([in_1, in_2], "cc") out = sal.collect(cc, 1) return set([in_1, in_2])
def protocol(): inpts = setup() in_1, in_2 = inpts[0], inpts[1] rel = cc.concat([in_1, in_2], "cc") out = cc.collect(rel, 1) return {in_1, in_2}
def protocol(): left_one_cols = [defCol("a", "INTEGER", 1), defCol("b", "INTEGER", 1)] left_one = cc.create("left_one", left_one_cols, {1}) right_one_cols = [defCol("c", "INTEGER", 1), defCol("d", "INTEGER", 1)] right_one = cc.create("right_one", right_one_cols, {1}) left_two_cols = [defCol("a", "INTEGER", 1), defCol("b", "INTEGER", 1)] left_two = cc.create("left_two", left_two_cols, {1}) right_two_cols = [defCol("c", "INTEGER", 1), defCol("d", "INTEGER", 1)] right_two = cc.create("right_two", right_two_cols, {1}) left = cc.concat([left_one, left_two], "left") right = cc.concat([right_one, right_two], "right") cc.join(left, right, "expected", ["a"], ["c"]) return {left_one, left_two, right_one, right_two}
def protocol(): inpts = setup() in_1, in_2 = inpts[0], inpts[1] cc = sal.concat([in_1, in_2], "cc") dist = sal.distinct(cc, "dist", ["a", "b", "c"]) out = sal.collect(dist, 1) return set([in_1, in_2])
def protocol(): cols_in1 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] in1 = sal.create("green1", cols_in1, {1}) cols_in2 = [ defCol("companyID", "INTEGER", [2]), defCol("price", "INTEGER", [2]) ] in2 = sal.create("green2", cols_in2, {2}) cols_in3 = [ defCol("companyID", "INTEGER", [3]), defCol("price", "INTEGER", [3]) ] in3 = sal.create("green3", cols_in3, {3}) cab_data = sal.concat([in1, in2, in3], "cab_data") selected_input = sal.project(cab_data, "selected_input", ["companyID", "price"]) local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"], "price", "+", "local_rev") scaled_down = sal.divide(local_rev, "scaled_down", "local_rev", ["local_rev", 1000]) first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID", ["companyID", 0]) local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100]) total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"], "local_rev", "+", "global_rev") local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev", ["companyID"], ["companyID"]) market_share = sal.divide(local_total_rev, "market_share", "local_rev", ["local_rev", "global_rev"]) market_share_squared = sal.multiply(market_share, "market_share_squared", "local_rev", ["local_rev", "local_rev", 1]) hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev", "+", "hhi") sal.collect(hhi, 1) # return root nodes return {in1, in2, in3}
def protocol(): inpts = setup() in_1, in_2 = inpts[0], inpts[1] rel = cc.concat([in_1, in_2], "cc") dist = cc.distinct(rel, "dist", ["a", "b", "c"]) out = cc.collect(dist, 1) return {in_1, in_2}
def protocol(): cols_in_1 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] in_1 = cc.create("yellow1", cols_in_1, {1}) cols_in_2 = [ defCol("companyID", "INTEGER", [2]), defCol("price", "INTEGER", [2]) ] in_2 = cc.create("yellow2", cols_in_2, {2}) cols_in_3 = [ defCol("companyID", "INTEGER", [3]), defCol("price", "INTEGER", [3]) ] in_3 = cc.create("yellow3", cols_in_3, {3}) cab_data = cc.concat([in_1, in_2, in_3], "cab_data") selected_input = cc.project(cab_data, "selected_input", ["companyID", "price"]) local_rev = cc.aggregate(selected_input, "local_rev", ["companyID"], "price", "sum", "local_rev") scaled_down = cc.divide(local_rev, "scaled_down", "local_rev", ["local_rev", 1000]) first_val_blank = cc.multiply(scaled_down, "first_val_blank", "companyID", ["companyID", 0]) local_rev_scaled = cc.multiply(first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100]) total_rev = cc.aggregate(first_val_blank, "total_rev", ["companyID"], "local_rev", "sum", "global_rev") local_total_rev = cc.join(local_rev_scaled, total_rev, "local_total_rev", ["companyID"], ["companyID"]) market_share = cc.divide(local_total_rev, "market_share", "local_rev", ["local_rev", "global_rev"]) market_share_squared = cc.multiply(market_share, "market_share_squared", "local_rev", ["local_rev", "local_rev", 1]) hhi = cc.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev", "sum", "hhi") # dummy projection to force non-mpc subdag hhi_only = cc.project(hhi, "hhi_only", ["companyID", "hhi"]) cc.collect(hhi_only, 1) # return root nodes return {in_1, in_2, in_3}
def protocol(): # define inputs colsIn1 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] in1 = sal.create("in1", colsIn1, set([1])) colsIn2 = [ defCol("companyID", "INTEGER", [2]), defCol("price", "INTEGER", [2]) ] in2 = sal.create("in2", colsIn2, set([2])) colsIn3 = [ defCol("companyID", "INTEGER", [3]), defCol("price", "INTEGER", [3]) ] in3 = sal.create("in3", colsIn3, set([3])) cl1 = sal._close(in1, "cl1", set([1, 2, 3])) cl2 = sal._close(in2, "cl2", set([1, 2, 3])) cl3 = sal._close(in3, "cl3", set([1, 2, 3])) cab_data = sal.concat([cl1, cl2, cl3], "cab_data") selected_input = sal.project(cab_data, "selected_input", ["companyID", "price"]) local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"], "price", "+", "local_rev") scaled_down = sal.divide(local_rev, "scaled_down", "local_rev", ["local_rev", 1000]) first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID", ["companyID", 0]) local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100]) total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"], "local_rev", "+", "global_rev") local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev", ["companyID"], ["companyID"]) market_share = sal.divide(local_total_rev, "market_share", "local_rev", ["local_rev", "global_rev"]) market_share_squared = sal.multiply(market_share, "market_share_squared", "local_rev", ["local_rev", "local_rev", 1]) hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev", "+", "hhi") hhi_opened = sal._open(hhi, "hhi_opened", 1) # return root nodes return set([in1, in2, in3])
def protocol(): input_columns_left = [ defCol("column_a", "INTEGER", [1]), defCol("column_b", "INTEGER", [1]) ] left = cc.create("left", input_columns_left, {1}) input_columns_right = [ defCol("column_a", "INTEGER", [1]), defCol("column_c", "INTEGER", [1]) ] right = cc.create("right", input_columns_right, {1}) cc.collect( cc.aggregate(cc.concat([left, right], "rel"), "expected", ["column_a"], "column_b", "sum", "total_b"), 1) return {left, right}
def protocol(): cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("a", "INTEGER", [1], [2]), defCol("b", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) cc.collect( cc.aggregate(cc.concat([in_1, in_2], "rel"), "agg", ["a"], "b", "sum", "total_b"), 1) return {in_1, in_2}
def protocol(): # define inputs cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_1 = sal.create("in_1", cols_in_a, set([1])) in_1.is_mpc = False proj_a = sal.project(in_1, "proj_a", ["a", "b"]) proj_a.is_mpc = False proj_a.out_rel.stored_with = set([1]) cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_b, set([2])) in_2.is_mpc = False proj_b = sal.project(in_2, "proj_b", ["c", "d"]) proj_b.is_mpc = False proj_b.out_rel.stored_with = set([2]) cols_in_c = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in_3 = sal.create("beforeOthers", cols_in_c, set([1, 2, 3])) in_3.is_mpc = True cl_a = sal._close(proj_a, "cl_a", set([1, 2, 3])) cl_a.is_mpc = True cl_b = sal._close(proj_b, "cl_b", set([1, 2, 3])) cl_b.is_mpc = True cl_c = sal._close(in_3, "cl_c", set([1, 2, 3])) cl_c.is_mpc = True right_closed = sal.concat([cl_a, cl_b, cl_c], "a") right_closed.is_mpc = True right_closed.out_rel.stored_with = set([1, 2, 3]) shuffled_a = sal.shuffle(cl_a, "shuffled_a") shuffled_a.is_mpc = True sal._open(shuffled_a, "ssn_opened", 1) return saldag.OpDag(set([in_1, in_2, in_3]))
def protocol(): input_columns_left = [ defCol("column_a", "INTEGER", [1]), defCol("column_b", "INTEGER", [1]) ] left = cc.create("left", input_columns_left, {1}) input_columns_right = [ defCol("column_a", "INTEGER", [1]), defCol("column_b", "INTEGER", [1]) ] right = cc.create("right", input_columns_right, {1}) rel = cc.concat([left, right], "rel") filtered = cc.cc_filter(rel, "filtered", "column_b", "==", scalar=1) in_order = cc.sort_by(filtered, "in_order", "column_a") cc.distinct_count(in_order, "expected", "column_a") return {left, right}
def protocol(): # define inputs cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_1 = cc.create("in_1", cols_in_a, {1}) in_1.is_mpc = False proj_a = cc.project(in_1, "proj_a", ["a", "b"]) proj_a.is_mpc = False proj_a.out_rel.stored_with = {1} cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_b, {2}) in_2.is_mpc = False proj_b = cc.project(in_2, "proj_b", ["c", "d"]) proj_b.is_mpc = False proj_b.out_rel.stored_with = {2} cols_in_c = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in_3 = cc.create("beforeOthers", cols_in_c, {1, 2, 3}) in_3.is_mpc = True cl_a = cc._close(proj_a, "cl_a", {1, 2, 3}) cl_a.is_mpc = True cl_b = cc._close(proj_b, "cl_b", {1, 2, 3}) cl_b.is_mpc = True cl_c = cc._close(in_3, "cl_c", {1, 2, 3}) cl_c.is_mpc = True right_closed = cc.concat([cl_a, cl_b, cl_c], "a") right_closed.is_mpc = True right_closed.out_rel.stored_with = {1, 2, 3} shuffled_a = cc.shuffle(cl_a, "shuffled_a") shuffled_a.is_mpc = True cc._open(shuffled_a, "ssn_opened", 1) return ccdag.OpDag({in_1, in_2, in_3})
def protocol(): input_columns_left = [ defCol("column_a", "INTEGER", [1]), defCol("column_b", "INTEGER", [1]) ] left = cc.create("left", input_columns_left, {1}) input_columns_right = [ defCol("column_a", "INTEGER", [1], [2]), defCol("column_c", "INTEGER", [1]) ] right = cc.create("right", input_columns_right, {2}) aggregated = cc.aggregate(cc.concat([left, right], "rel"), "actual", ["column_a"], "column_b", "sum", "total_b") actual_open = cc.project(aggregated, "actual_open", ["column_a", "total_b"]) cc.collect(actual_open, 1) return {left, right}
def protocol(): cols_in_1 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] cols_in_2 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] cols_in_3 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] yellow1 = sal.create("yellow1", cols_in_1, {1}) yellow2 = sal.create("yellow2", cols_in_2, {1}) yellow3 = sal.create("yellow3", cols_in_3, {1}) cab_data = sal.concat([yellow1, yellow2, yellow3], "cab_data") selected_input = sal.project(cab_data, "selected_input", ["companyID", "price"]) local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"], "price", "+", "local_rev") scaled_down = sal.divide(local_rev, "scaled_down", "local_rev", ["local_rev", 1000]) first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID", ["companyID", 0]) local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100]) total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"], "local_rev", "+", "global_rev") local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev", ["companyID"], ["companyID"]) market_share = sal.divide(local_total_rev, "market_share", "local_rev", ["local_rev", "global_rev"]) market_share_squared = sal.multiply(market_share, "market_share_squared", "local_rev", ["local_rev", "local_rev", 1]) hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev", "+", "hhi") sal.collect(hhi, 1) return {yellow1, yellow2, yellow3}
def protocol(): # define inputs colsIn1 = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])] in1 = sal.create("in1", colsIn1, set([1])) colsIn2 = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])] in2 = sal.create("in2", colsIn2, set([2])) colsIn3 = [defCol("e", "INTEGER", [3]), defCol("f", "INTEGER", [3])] in3 = sal.create("in3", colsIn3, set([3])) cl1 = sal._close(in1, "cl1", set([1, 2, 3])) cl2 = sal._close(in2, "cl2", set([1, 2, 3])) cl3 = sal._close(in3, "cl3", set([1, 2, 3])) rel = sal.concat([cl1, cl2, cl3], "rel") agg = sal.aggregate(rel, "agg", ["a"], "b", "+", "total") opened = sal._open(agg, "opened", 1) # return root nodes return set([in1, in2, in3])
def protocol(): diagnosis_col = "12" num_diagnosis_cols = 13 left_diagnosis_cols = [ defCol(str(i), "INTEGER", 1) for i in range(num_diagnosis_cols) ] left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1}) right_diagnosis_cols = [ defCol(str(i), "INTEGER", 2) for i in range(num_diagnosis_cols) ] right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2}) cohort = cc.concat([left_diagnosis, right_diagnosis], "cohort") counts = cc.aggregate_count(cohort, "counts", [diagnosis_col], "total") cc.collect(cc.sort_by(counts, "actual", "total"), 1) return {left_diagnosis, right_diagnosis}
def protocol(): pid_col = "8" diagnosis_col = "16" cols_to_skip = 8 num_diagnosis_cols = 13 left_diagnosis_cols = [ defCol(str(i + cols_to_skip), "INTEGER", 1) for i in range(num_diagnosis_cols) ] left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1}) left_cohort = cc.create("left_cohort", [defCol("pid", "INTEGER", 1)], {1}) left_selected = cc.filter_by(left_diagnosis, "left_selected", pid_col, left_cohort) right_diagnosis_cols = [ defCol(str(i + cols_to_skip), "INTEGER", 2) for i in range(num_diagnosis_cols) ] right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2}) right_cohort = cc.create("right_cohort", [defCol("pid", "INTEGER", 2)], {2}) right_selected = cc.filter_by(right_diagnosis, "right_selected", pid_col, right_cohort) cohort = cc.concat([left_selected, right_selected], "cohort") counts = cc.aggregate_count(cohort, "counts", [diagnosis_col], "total") cc.collect(cc.sort_by(counts, "actual", "total"), 1) return {left_diagnosis, left_cohort, right_diagnosis, right_cohort}
def protocol(): # define inputs colsIn1 = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])] in1 = sal.create("govreg", colsIn1, set([1])) colsIn2 = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])] in2 = sal.create("company0", colsIn2, set([2])) colsIn3 = [defCol("c", "INTEGER", [3]), defCol("d", "INTEGER", [3])] in3 = sal.create("company1", colsIn3, set([3])) cl1 = sal._close(in1, "cl1", set([1, 2, 3])) projA = sal.project(cl1, "projA", ["a", "b"]) cl2 = sal._close(in2, "cl2", set([1, 2, 3])) cl3 = sal._close(in3, "cl3", set([1, 2, 3])) right_rel = sal.concat([cl2, cl3], "right_rel") projB = sal.project(right_rel, "projB", ["c", "d"]) joined = sal.join(projA, right_rel, "joined", ["a"], ["c"]) agg = sal.aggregate(joined, "agg", ["b"], "d", "+", "total") opened = sal._open(agg, "opened", 1) return set([in1, in2, in3])