def generate_code(protocol: callable, conclave_config: CodeGenConfig, mpc_frameworks: list, local_frameworks: list, apply_optimizations: bool = True): """ Applies optimization rewrite passes to protocol, partitions resulting condag, and generates backend specific code for each sub-condag. :param protocol: protocol to compile :param conclave_config: conclave configuration :param mpc_frameworks: available mpc backend frameworks :param local_frameworks: available local-processing backend frameworks :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag :return: queue of job objects to be executed by dispatcher """ # currently only allow one local and one mpc framework assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1 # set up code gen config object if isinstance(conclave_config, CodeGenConfig): cfg = conclave_config else: cfg = CodeGenConfig.from_dict(conclave_config) # apply optimizations dag = condag.OpDag(protocol()) # only apply optimizations if required if apply_optimizations: dag = comp.rewrite_dag(dag) # partition into subdags that will run in specific frameworks mapping = part.heupart(dag, mpc_frameworks, local_frameworks) # for each sub condag run code gen and add resulting job to job queue job_queue = [] for job_num, (framework, sub_dag, stored_with) in enumerate(mapping): print(job_num, framework) if framework == "sharemind": name = "{}-sharemind-job-{}".format(cfg.name, job_num) job = SharemindCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) elif framework == "spark": name = "{}-spark-job-{}".format(cfg.name, job_num) job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) elif framework == "python": name = "{}-python-job-{}".format(cfg.name, job_num) job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) else: raise Exception("Unknown framework: " + framework) # TODO: this probably doesn't belong here if conclave_config.pid not in stored_with: job.skip = True return job_queue
def testHybridAggWorkflow(): @dag_only def protocol(): # define inputs colsInA = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("in1", colsInA, set([1])) in1.isMPC = False proja = sal.project(in1, "proja", ["a", "b"]) proja.isMPC = False proja.out_rel.storedWith = set([1]) # define inputs colsInB = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]), ] in2 = sal.create("in2", colsInB, set([2])) in2.isMPC = False projb = sal.project(in2, "projb", ["a", "b"]) projb.isMPC = False projb.out_rel.storedWith = set([2]) # define inputs colsInC = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]), ] in3 = sal.create("in3", colsInC, set([3])) in3.isMPC = False projc = sal.project(in3, "projc", ["a", "b"]) projc.isMPC = False projc.out_rel.storedWith = set([3]) clA = sal._close(proja, "clA", set([1, 2, 3])) clA.isMPC = True clB = sal._close(projb, "clB", set([1, 2, 3])) clB.isMPC = True clC = sal._close(projc, "clC", set([1, 2, 3])) clC.isMPC = True comb = sal.concat([clA, clB, clC], "comb") comb.out_rel.storedWith = set([1, 2, 3]) comb.isMPC = True shuffled = sal.shuffle(comb, "shuffled") shuffled.out_rel.storedWith = set([1, 2, 3]) shuffled.isMPC = True persisted = sal._persist(shuffled, "persisted") persisted.out_rel.storedWith = set([1, 2, 3]) persisted.isMPC = True keysclosed = sal.project(shuffled, "keysclosed", ["a"]) keysclosed.out_rel.storedWith = set([1, 2, 3]) keysclosed.isMPC = True keys = sal._open(keysclosed, "keys", 1) keys.isMPC = True indexed = sal.index(keys, "indexed", "rowIndex") indexed.isMPC = False indexed.out_rel.storedWith = set([1]) sortedByKey = sal.sort_by(indexed, "sortedByKey", "a") sortedByKey.isMPC = False sortedByKey.out_rel.storedWith = set([1]) eqFlags = sal._comp_neighs(sortedByKey, "eqFlags", "a") eqFlags.isMPC = False eqFlags.out_rel.storedWith = set([1]) # TODO: hack to get keys stored # need to fix later! sortedByKey = sal.project(sortedByKey, "sortedByKey", ["rowIndex", "a"]) sortedByKey.isMPC = False sortedByKey.out_rel.storedWith = set([1]) closedEqFlags = sal._close(eqFlags, "closedEqFlags", set([1, 2, 3])) closedEqFlags.isMPC = True closedSortedByKey = sal._close(sortedByKey, "closedSortedByKey", set([1, 2, 3])) closedSortedByKey.isMPC = True agg = sal.index_aggregate(persisted, "agg", ["a"], "b", "+", "b", closedEqFlags, closedSortedByKey) agg.out_rel.storedWith = set([1, 2, 3]) agg.isMPC = True sal._open(agg, "opened", 1) # create condag return set([in1, in2, in3]) pid = int(sys.argv[1]) size = sys.argv[2] workflow_name = "hybrid-agg-" + str(pid) sm_cg_config = SharemindCodeGenConfig(workflow_name, "/mnt/shared", use_hdfs=False, use_docker=True) codegen_config = CodeGenConfig(workflow_name).with_sharemind_config( sm_cg_config) codegen_config.code_path = "/mnt/shared/" + workflow_name codegen_config.input_path = "/mnt/shared/" + size codegen_config.output_path = "/mnt/shared/" + size dag = protocol() mapping = part.heupart(dag, ["sharemind"], ["python"]) job_queue = [] for idx, (fmwk, subdag, storedWith) in enumerate(mapping): if fmwk == "sharemind": job = SharemindCodeGen(codegen_config, subdag, pid).generate("sharemind-" + str(idx), None) else: job = PythonCodeGen(codegen_config, subdag).generate("python-" + str(idx), None) # TODO: this probably doesn't belong here if not pid in storedWith: job.skip = True job_queue.append(job) sharemind_config = { "pid": pid, "parties": { 1: { "host": "ca-spark-node-0", "port": 9001 }, 2: { "host": "cb-spark-node-0", "port": 9002 }, 3: { "host": "cc-spark-node-0", "port": 9003 } } } sm_peer = setup_peer(sharemind_config) dispatch_all(None, sm_peer, job_queue)
def testPublicJoinWorkflow(): @dag_only def protocol(): # define inputs colsInA = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("in1", colsInA, set([1])) in1.isMPC = False proja = sal.project(in1, "proja", ["a", "b"]) proja.isMPC = False proja.out_rel.storedWith = set([1]) colsInB = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in2 = sal.create("in2", colsInB, set([2])) in2.isMPC = False projb = sal.project(in2, "projb", ["c", "d"]) projb.isMPC = False projb.out_rel.storedWith = set([2]) clA = sal._close(proja, "clA", set([1, 2, 3])) clA.isMPC = True clB = sal._close(projb, "clB", set([1, 2, 3])) clB.isMPC = True persistedA = sal._persist(clA, "persistedA") persistedB = sal._persist(clB, "persistedB") keysaclosed = sal.project(clA, "keysaclosed", ["a"]) keysaclosed.out_rel.storedWith = set([1, 2, 3]) keysaclosed.isMPC = True keysbclosed = sal.project(clB, "keysbclosed", ["c"]) keysbclosed.isMPC = True keysbclosed.out_rel.storedWith = set([1, 2, 3]) keysa = sal._open(keysaclosed, "keysa", 1) keysa.isMPC = True keysb = sal._open(keysbclosed, "keysb", 1) keysb.isMPC = True indexedA = sal.index(keysa, "indexedA", "indexA") indexedA.isMPC = False indexedA.out_rel.storedWith = set([1]) indexedB = sal.index(keysb, "indexedB", "indexB") indexedB.isMPC = False indexedB.out_rel.storedWith = set([1]) joinedindeces = sal.join( indexedA, indexedB, "joinedindeces", ["a"], ["c"]) joinedindeces.isMPC = False joinedindeces.out_rel.storedWith = set([1]) indecesonly = sal.project( joinedindeces, "indecesonly", ["indexA", "indexB"]) indecesonly.isMPC = False indecesonly.out_rel.storedWith = set([1]) indecesclosed = sal._close( indecesonly, "indecesclosed", set([1, 2, 3])) indecesclosed.isMPC = True joined = sal._index_join(persistedA, persistedB, "joined", ["a"], ["c"], indecesclosed) joined.isMPC = True sal._open(joined, "opened", 1) # create condag return set([in1, in2]) pid = int(sys.argv[1]) workflow_name = "hybrid-join-" + str(pid) sm_cg_config = SharemindCodeGenConfig( workflow_name, "/mnt/shared", use_hdfs=False) codegen_config = CodeGenConfig( workflow_name).with_sharemind_config(sm_cg_config) codegen_config.code_path = "/mnt/shared/" + workflow_name codegen_config.input_path = "/mnt/shared" codegen_config.output_path = "/mnt/shared" exampleutils.generate_data(pid, codegen_config.output_path) dag = protocol() mapping = part.heupart(dag, ["sharemind"], ["python"]) job_queue = [] for idx, (fmwk, subdag, storedWith) in enumerate(mapping): if fmwk == "sharemind": job = SharemindCodeGen(codegen_config, subdag, pid).generate( "sharemind-" + str(idx), None) else: job = PythonCodeGen(codegen_config, subdag).generate( "python-" + str(idx), None) # TODO: this probably doesn't belong here if not pid in storedWith: job.skip = True job_queue.append(job) sharemind_config = exampleutils.get_sharemind_config(pid, True) sm_peer = setup_peer(sharemind_config) dispatch_all(None, sm_peer, job_queue) if pid == 1: expected = ['', '2,200,2001', '3,300,3001', '4,400,4001', '42,42,1001', '5,500,5001', '6,600,6001', '7,700,7001', '7,800,7001', '7,900,7001', '8,1000,8001', '9,1100,9001'] exampleutils.check_res(expected, "/mnt/shared/opened.csv") print("Success")
def testPublicJoinWorkflow(): @dag_only def protocol(): # define inputs colsInA = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("in1", colsInA, set([1])) in1.isMPC = False proja = sal.project(in1, "proja", ["a", "b"]) proja.isMPC = False proja.out_rel.storedWith = set([1]) colsInB = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in2 = sal.create("in2", colsInB, set([2])) in2.isMPC = False projb = sal.project(in2, "projb", ["c", "d"]) projb.isMPC = False projb.out_rel.storedWith = set([2]) clA = sal._close(proja, "clA", set([1, 2, 3])) clA.isMPC = True clB = sal._close(projb, "clB", set([1, 2, 3])) clB.isMPC = True persistedA = sal._persist(clA, "persistedA") persistedA.isMPC = True persistedB = sal._persist(clB, "persistedB") persistedB.isMPC = True keysaclosed = sal.project(clA, "keysaclosed", ["a"]) keysaclosed.out_rel.storedWith = set([1, 2, 3]) keysaclosed.isMPC = True keysbclosed = sal.project(clB, "keysbclosed", ["c"]) keysbclosed.isMPC = True keysbclosed.out_rel.storedWith = set([1, 2, 3]) keysa = sal._open(keysaclosed, "keysa", 1) keysa.isMPC = True keysb = sal._open(keysbclosed, "keysb", 1) keysb.isMPC = True indexedA = sal.index(keysa, "indexedA", "indexA") indexedA.isMPC = False indexedA.out_rel.storedWith = set([1]) indexedB = sal.index(keysb, "indexedB", "indexB") indexedB.isMPC = False indexedB.out_rel.storedWith = set([1]) joinedindeces = sal.join( indexedA, indexedB, "joinedindeces", ["a"], ["c"]) joinedindeces.isMPC = False joinedindeces.out_rel.storedWith = set([1]) indecesonly = sal.project( joinedindeces, "indecesonly", ["indexA", "indexB"]) indecesonly.isMPC = False indecesonly.out_rel.storedWith = set([1]) indecesclosed = sal._close( indecesonly, "indecesclosed", set([1, 2, 3])) indecesclosed.isMPC = True joined = sal._index_join(persistedA, persistedB, "joined", [ "a"], ["c"], indecesclosed) joined.out_rel.storedWith = set([1, 2, 3]) joined.isMPC = True sal._open(joined, "opened", 1) # create condag return set([in1, in2]) pid = int(sys.argv[1]) size = sys.argv[2] workflow_name = "public-join-" + str(pid) sm_cg_config = SharemindCodeGenConfig( workflow_name, "/mnt/shared", use_hdfs=False, use_docker=True) codegen_config = CodeGenConfig( workflow_name).with_sharemind_config(sm_cg_config) codegen_config.code_path = "/mnt/shared/" + workflow_name codegen_config.input_path = "/mnt/shared/hybridjoin/" + size codegen_config.output_path = "/mnt/shared/hybridjoin/" + size dag = protocol() mapping = part.heupart(dag, ["sharemind"], ["python"]) job_queue = [] for idx, (fmwk, subdag, storedWith) in enumerate(mapping): if fmwk == "sharemind": job = SharemindCodeGen(codegen_config, subdag, pid).generate( "sharemind-" + str(idx), None) else: job = PythonCodeGen(codegen_config, subdag).generate( "python-" + str(idx), None) # TODO: this probably doesn't belong here if not pid in storedWith: job.skip = True job_queue.append(job) sharemind_config = { "pid": pid, "parties": { 1: {"host": "ca-spark-node-0", "port": 9001}, 2: {"host": "cb-spark-node-0", "port": 9002}, 3: {"host": "cc-spark-node-0", "port": 9003} } } sm_peer = setup_peer(sharemind_config) dispatch_all(None, sm_peer, job_queue)
def generate_code(protocol: callable, cfg: CodeGenConfig, mpc_frameworks: list, local_frameworks: list, apply_optimizations: bool = True): """ Applies optimization rewrite passes to protocol, partitions resulting dag, and generates backend specific code for each sub-dag. :param protocol: protocol to compile :param cfg: conclave configuration :param mpc_frameworks: available mpc backend frameworks :param local_frameworks: available local-processing backend frameworks :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag :return: queue of job objects to be executed by dispatcher """ dag = condag.OpDag(protocol()) job_queue = [] if "single-party-spark" not in set(mpc_frameworks) and "single-party-python" not in set(mpc_frameworks): # currently only allow one local and one mpc framework assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1 # only apply optimizations if required if apply_optimizations: dag = comp.rewrite_dag(dag, all_parties=cfg.all_pids, use_leaky_ops=cfg.use_leaky_ops) # partition into sub-dags that will run in specific frameworks mapping = part.heupart(dag, mpc_frameworks, local_frameworks) # for each sub-dag run code gen and add resulting job to job queue for job_num, (framework, sub_dag, stored_with) in enumerate(mapping): print(job_num, framework) if framework == "sharemind": name = "{}-sharemind-job-{}".format(cfg.name, job_num) job = SharemindCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) elif framework == "spark": name = "{}-spark-job-{}".format(cfg.name, job_num) job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) elif framework == "python": name = "{}-python-job-{}".format(cfg.name, job_num) job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) elif framework == "obliv-c": name = "{}-oblivc-job-{}".format(cfg.name, job_num) job = OblivcCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) elif framework == "jiff": name = "{}-jiff-job-{}".format(cfg.name, job_num) job = JiffCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) else: raise Exception("Unknown framework: " + framework) # TODO: this probably doesn't belong here if cfg.pid not in stored_with: job.skip = True else: assert len(mpc_frameworks) == 1 if mpc_frameworks[0] == "single-party-spark": name = "{}-spark-job-0".format(cfg.name) job = SinglePartyCodegen(cfg, dag, "spark").generate(name, cfg.output_path) job_queue.append(job) elif mpc_frameworks[0] == "single-party-python": name = "{}-python-job-0".format(cfg.name) job = SinglePartyCodegen(cfg, dag, "python").generate(name, cfg.output_path) job_queue.append(job) else: raise Exception("Unknown framework: {}".format(mpc_frameworks[0])) return job_queue
def testHybridJoinWorkflow(): def hybrid_join(): # define inputs colsInA = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("in1", colsInA, set([1])) in1.isMPC = False proja = sal.project(in1, "proja", ["a", "b"]) proja.isMPC = False proja.out_rel.storedWith = set([1]) colsInB = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in2 = sal.create("in2", colsInB, set([2])) in2.isMPC = False projb = sal.project(in2, "projb", ["c", "d"]) projb.isMPC = False projb.out_rel.storedWith = set([2]) clA = sal._close(proja, "clA", set([1, 2, 3])) clA.isMPC = True clB = sal._close(projb, "clB", set([1, 2, 3])) clB.isMPC = True shuffledA = sal.shuffle(clA, "shuffledA") shuffledA.isMPC = True persistedA = sal._persist(shuffledA, "persistedA") persistedA.isMPC = True shuffledB = sal.shuffle(clB, "shuffledB") shuffledB.isMPC = True persistedB = sal._persist(shuffledB, "persistedB") persistedB.isMPC = True keysaclosed = sal.project(shuffledA, "keysaclosed", ["a"]) keysaclosed.out_rel.storedWith = set([1, 2, 3]) keysaclosed.isMPC = True keysbclosed = sal.project(shuffledB, "keysbclosed", ["c"]) keysbclosed.isMPC = True keysbclosed.out_rel.storedWith = set([1, 2, 3]) keysa = sal._open(keysaclosed, "keysa", 1) keysa.isMPC = True keysb = sal._open(keysbclosed, "keysb", 1) keysb.isMPC = True indexedA = sal.index(keysa, "indexedA", "indexA") indexedA.isMPC = False indexedA.out_rel.storedWith = set([1]) indexedB = sal.index(keysb, "indexedB", "indexB") indexedB.isMPC = False indexedB.out_rel.storedWith = set([1]) joinedindeces = sal.join( indexedA, indexedB, "joinedindeces", ["a"], ["c"]) joinedindeces.isMPC = False joinedindeces.out_rel.storedWith = set([1]) indecesonly = sal.project( joinedindeces, "indecesonly", ["indexA", "indexB"]) indecesonly.isMPC = False indecesonly.out_rel.storedWith = set([1]) indecesclosed = sal._close( indecesonly, "indecesclosed", set([1, 2, 3])) indecesclosed.isMPC = True joined = sal._index_join(persistedA, persistedB, "joined", [ "a"], ["c"], indecesclosed) joined.isMPC = True return joined, set([in1, in2]) def hybrid_agg(in1): shuffled = sal.shuffle(in1, "shuffled") shuffled.out_rel.storedWith = set([1, 2, 3]) shuffled.isMPC = True persisted = sal._persist(shuffled, "persisted") persisted.out_rel.storedWith = set([1, 2, 3]) persisted.isMPC = True keysclosed = sal.project(shuffled, "keysclosed", ["b"]) keysclosed.out_rel.storedWith = set([1, 2, 3]) keysclosed.isMPC = True keys = sal._open(keysclosed, "keys", 1) keys.isMPC = True indexed = sal.index(keys, "indexed", "rowIndex") indexed.isMPC = False indexed.out_rel.storedWith = set([1]) distinctKeys = sal.distinct(keys, "distinctKeys", ["b"]) distinctKeys.isMPC = False distinctKeys.out_rel.storedWith = set([1]) # TODO: hack to get keys stored # need to fix later! fakeDistinctKeys = sal.distinct(keys, "distinctKeys", ["b"]) fakeDistinctKeys.isMPC = False fakeDistinctKeys.out_rel.storedWith = set([1]) indexedDistinct = sal.index(distinctKeys, "indexedDistinct", "keyIndex") indexedDistinct.isMPC = False indexedDistinct.out_rel.storedWith = set([1]) joinedindeces = sal.join( indexed, indexedDistinct, "joinedindeces", ["b"], ["b"]) joinedindeces.isMPC = False joinedindeces.out_rel.storedWith = set([1]) # TODO: could project row indeces away too indecesonly = sal.project( joinedindeces, "indecesonly", ["rowIndex", "keyIndex"]) indecesonly.isMPC = False indecesonly.out_rel.storedWith = set([1]) closedDistinct = sal._close(distinctKeys, "closedDistinct", set([1, 2, 3])) closedDistinct.isMPC = True closedLookup = sal._close(indecesonly, "closedLookup", set([1, 2, 3])) closedLookup.isMPC = True agg = sal.index_aggregate(persisted, "agg", ["b"], "d", "+", "d", closedLookup, closedDistinct) agg.isMPC = True sal._open(agg, "aggopened", 1) def protocol(): joinedres, inputs = hybrid_join() hybrid_agg(joinedres) return saldag.OpDag(inputs) pid = int(sys.argv[1]) workflow_name = "ssn-" + str(pid) sm_cg_config = SharemindCodeGenConfig( workflow_name, "/mnt/shared", use_hdfs=False, use_docker=False) codegen_config = CodeGenConfig( workflow_name).with_sharemind_config(sm_cg_config) codegen_config.code_path = "/mnt/shared/" + workflow_name codegen_config.input_path = "/mnt/shared" codegen_config.output_path = "/mnt/shared" exampleutils.generate_ssn_data(pid, codegen_config.output_path) dag = protocol() mapping = part.heupart(dag, ["sharemind"], ["python"]) job_queue = [] for idx, (fmwk, subdag, storedWith) in enumerate(mapping): if fmwk == "sharemind": job = SharemindCodeGen(codegen_config, subdag, pid).generate( "sharemind-" + str(idx), None) else: job = PythonCodeGen(codegen_config, subdag).generate( "python-" + str(idx), None) # TODO: this probably doesn't belong here if not pid in storedWith: job.skip = True job_queue.append(job) sharemind_config = exampleutils.get_sharemind_config(pid, True) sm_peer = setup_peer(sharemind_config) dispatch_all(None, sm_peer, job_queue) if pid == 1: expected = ['', '1,30', '2,50', '3,30'] exampleutils.check_res(expected, "/mnt/shared/aggopened.csv") print("Success")