def test_ssn(self): def protocol(): govreg_cols = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] govreg = cc.create("a_govreg", govreg_cols, {1}) govreg_dummy = cc.project(govreg, "govreg_dummy", ["a", "b"]) company0_cols = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] company0 = cc.create("company0", company0_cols, {2}) company0_dummy = cc.project(company0, "company0_dummy", ["c", "d"]) company1_cols = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] company1 = cc.create("company1", company1_cols, {3}) company1_dummy = cc.project(company1, "company1_dummy", ["c", "d"]) companies = cc.concat([company0_dummy, company1_dummy], "companies") joined = cc.join(govreg_dummy, companies, "joined", ["a"], ["c"]) res = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total") cc.collect(res, 1) return {govreg, company0, company1} dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True) actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0) self.check_workflow(actual, "ssn_leaky")
def test_partition_ssn(self): def protocol(): govreg_cols = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] govreg = cc.create("govreg", govreg_cols, {1}) company0_cols = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] company0 = cc.create("company0", company0_cols, {2}) company1_cols = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] company1 = cc.create("company1", company1_cols, {3}) companies = cc.concat([company0, company1], "companies") joined = cc.join(govreg, companies, "joined", ["a"], ["c"]) actual = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total") cc.collect(actual, 1) return {govreg, company0, company1} dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True) self.check_workflow(dag, "ssn")
def generate_code(protocol: callable, conclave_config: CodeGenConfig, mpc_frameworks: list, local_frameworks: list, apply_optimizations: bool = True): """ Applies optimization rewrite passes to protocol, partitions resulting condag, and generates backend specific code for each sub-condag. :param protocol: protocol to compile :param conclave_config: conclave configuration :param mpc_frameworks: available mpc backend frameworks :param local_frameworks: available local-processing backend frameworks :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag :return: queue of job objects to be executed by dispatcher """ # currently only allow one local and one mpc framework assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1 # set up code gen config object if isinstance(conclave_config, CodeGenConfig): cfg = conclave_config else: cfg = CodeGenConfig.from_dict(conclave_config) # apply optimizations dag = condag.OpDag(protocol()) # only apply optimizations if required if apply_optimizations: dag = comp.rewrite_dag(dag) # partition into subdags that will run in specific frameworks mapping = part.heupart(dag, mpc_frameworks, local_frameworks) # for each sub condag run code gen and add resulting job to job queue job_queue = [] for job_num, (framework, sub_dag, stored_with) in enumerate(mapping): print(job_num, framework) if framework == "sharemind": name = "{}-sharemind-job-{}".format(cfg.name, job_num) job = SharemindCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) elif framework == "spark": name = "{}-spark-job-{}".format(cfg.name, job_num) job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) elif framework == "python": name = "{}-python-job-{}".format(cfg.name, job_num) job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) else: raise Exception("Unknown framework: " + framework) # TODO: this probably doesn't belong here if conclave_config.pid not in stored_with: job.skip = True return job_queue
def protocol(): # define inputs cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_1 = sal.create("in_1", cols_in_a, set([1])) in_1.is_mpc = False proj_a = sal.project(in_1, "proj_a", ["a", "b"]) proj_a.is_mpc = False proj_a.out_rel.stored_with = set([1]) cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_b, set([2])) in_2.is_mpc = False proj_b = sal.project(in_2, "proj_b", ["c", "d"]) proj_b.is_mpc = False proj_b.out_rel.stored_with = set([2]) cols_in_c = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in_3 = sal.create("beforeOthers", cols_in_c, set([1, 2, 3])) in_3.is_mpc = True cl_a = sal._close(proj_a, "cl_a", set([1, 2, 3])) cl_a.is_mpc = True cl_b = sal._close(proj_b, "cl_b", set([1, 2, 3])) cl_b.is_mpc = True cl_c = sal._close(in_3, "cl_c", set([1, 2, 3])) cl_c.is_mpc = True right_closed = sal.concat([cl_a, cl_b, cl_c], "a") right_closed.is_mpc = True right_closed.out_rel.stored_with = set([1, 2, 3]) shuffled_a = sal.shuffle(cl_a, "shuffled_a") shuffled_a.is_mpc = True sal._open(shuffled_a, "ssn_opened", 1) return saldag.OpDag(set([in_1, in_2, in_3]))
def protocol(): # define inputs cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_1 = cc.create("in_1", cols_in_a, {1}) in_1.is_mpc = False proj_a = cc.project(in_1, "proj_a", ["a", "b"]) proj_a.is_mpc = False proj_a.out_rel.stored_with = {1} cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_b, {2}) in_2.is_mpc = False proj_b = cc.project(in_2, "proj_b", ["c", "d"]) proj_b.is_mpc = False proj_b.out_rel.stored_with = {2} cols_in_c = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in_3 = cc.create("beforeOthers", cols_in_c, {1, 2, 3}) in_3.is_mpc = True cl_a = cc._close(proj_a, "cl_a", {1, 2, 3}) cl_a.is_mpc = True cl_b = cc._close(proj_b, "cl_b", {1, 2, 3}) cl_b.is_mpc = True cl_c = cc._close(in_3, "cl_c", {1, 2, 3}) cl_c.is_mpc = True right_closed = cc.concat([cl_a, cl_b, cl_c], "a") right_closed.is_mpc = True right_closed.out_rel.stored_with = {1, 2, 3} shuffled_a = cc.shuffle(cl_a, "shuffled_a") shuffled_a.is_mpc = True cc._open(shuffled_a, "ssn_opened", 1) return ccdag.OpDag({in_1, in_2, in_3})
def test_hybrid_agg_opt(self): def protocol(): cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("a", "INTEGER", [1], [2]), defCol("b", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) cc.collect( cc.aggregate(cc.concat([in_1, in_2], "rel"), "agg", ["a"], "b", "sum", "total_b"), 1) return {in_1, in_2} dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True) actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0) self.check_workflow(actual, "hybrid_agg_leaky")
def test_partition_hybrid_join(self): def protocol(): cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_a = cc.create("in_a", cols_in_a, {1}) proj_a = cc.project(in_a, "proj_a", ["a", "b"]) cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_b = cc.create("in_b", cols_in_b, {2}) proj_b = cc.project(in_b, "proj_b", ["c", "d"]) joined = cc.join(proj_a, proj_b, "joined", ["a"], ["c"]) cc.collect(joined, 1) return {in_a, in_b} dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True) self.check_workflow(dag, 'hybrid_join')
def test_public_join(self): def protocol(): left_one_cols = [ defCol("a", "INTEGER", 1, 2, 3), defCol("b", "INTEGER", 1) ] left_one = cc.create("left_one", left_one_cols, {1}) right_one_cols = [ defCol("c", "INTEGER", 1, 2, 3), defCol("d", "INTEGER", 1) ] right_one = cc.create("right_one", right_one_cols, {1}) left_two_cols = [ defCol("a", "INTEGER", 1, 2, 3), defCol("b", "INTEGER", 2) ] left_two = cc.create("left_two", left_two_cols, {2}) right_two_cols = [ defCol("c", "INTEGER", 1, 2, 3), defCol("d", "INTEGER", 2) ] right_two = cc.create("right_two", right_two_cols, {2}) left = cc.concat([left_one, left_two], "left") right = cc.concat([right_one, right_two], "right") joined = cc.join(left, right, "joined", ["a"], ["c"]) cc.collect(joined, 1) return {left_one, left_two, right_one, right_two} dag = rewrite_dag(ccdag.OpDag(protocol())) actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0) self.check_workflow(actual, 'public_join')
def test_hybrid_join_party_two_opt(self): def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1], [2]), defCol("b", "INTEGER", [1]), ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) result = cc.join(in_1, in_2, "result", ["a"], ["c"]) cc.collect(result, 1) # create dag return {in_1, in_2} dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True) actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0) self.check_workflow(actual, 'hybrid_join_leaky_party_two')
def wrap(): return saldag.OpDag(f())
def wrapper(*args, **kwargs): dag = rewrite_dag(saldag.OpDag(f())) return dag
def protocol(): joined_res, inputs = hybrid_join() hybrid_agg(joined_res) return saldag.OpDag(inputs)
def generate_code(protocol: callable, cfg: CodeGenConfig, mpc_frameworks: list, local_frameworks: list, apply_optimizations: bool = True): """ Applies optimization rewrite passes to protocol, partitions resulting dag, and generates backend specific code for each sub-dag. :param protocol: protocol to compile :param cfg: conclave configuration :param mpc_frameworks: available mpc backend frameworks :param local_frameworks: available local-processing backend frameworks :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag :return: queue of job objects to be executed by dispatcher """ dag = condag.OpDag(protocol()) job_queue = [] if "single-party-spark" not in set(mpc_frameworks) and "single-party-python" not in set(mpc_frameworks): # currently only allow one local and one mpc framework assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1 # only apply optimizations if required if apply_optimizations: dag = comp.rewrite_dag(dag, all_parties=cfg.all_pids, use_leaky_ops=cfg.use_leaky_ops) # partition into sub-dags that will run in specific frameworks mapping = part.heupart(dag, mpc_frameworks, local_frameworks) # for each sub-dag run code gen and add resulting job to job queue for job_num, (framework, sub_dag, stored_with) in enumerate(mapping): print(job_num, framework) if framework == "sharemind": name = "{}-sharemind-job-{}".format(cfg.name, job_num) job = SharemindCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) elif framework == "spark": name = "{}-spark-job-{}".format(cfg.name, job_num) job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) elif framework == "python": name = "{}-python-job-{}".format(cfg.name, job_num) job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) elif framework == "obliv-c": name = "{}-oblivc-job-{}".format(cfg.name, job_num) job = OblivcCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) elif framework == "jiff": name = "{}-jiff-job-{}".format(cfg.name, job_num) job = JiffCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) else: raise Exception("Unknown framework: " + framework) # TODO: this probably doesn't belong here if cfg.pid not in stored_with: job.skip = True else: assert len(mpc_frameworks) == 1 if mpc_frameworks[0] == "single-party-spark": name = "{}-spark-job-0".format(cfg.name) job = SinglePartyCodegen(cfg, dag, "spark").generate(name, cfg.output_path) job_queue.append(job) elif mpc_frameworks[0] == "single-party-python": name = "{}-python-job-0".format(cfg.name) job = SinglePartyCodegen(cfg, dag, "python").generate(name, cfg.output_path) job_queue.append(job) else: raise Exception("Unknown framework: {}".format(mpc_frameworks[0])) return job_queue