예제 #1
0
파일: join.py 프로젝트: rob-baron/conclave
def join(pid, config, sharemind_peer, f_size):

    @dag_only
    def protocol():

        colsIn1 = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1])
        ]
        in1 = sal.create("in1", colsIn1, set([1]))
        colsIn2 = [
            defCol("c", "INTEGER", [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsIn2, set([2]))

        cl1 = sal._close(in1, "cl1", set([1, 2, 3]))
        cl2 = sal._close(in2, "cl2", set([1, 2, 3]))

        res = sal.join(cl1, cl2, "res", ["a"], ["c"])

        opened = sal._open(res, "opened", 1)
        return set([in1, in2])

    cg = SharemindCodeGen(config, protocol(), pid)
    job = cg.generate("join_{}".format(f_size), "")
    job_queue = [job]

    conclave.dispatch.dispatch_all(None, sharemind_peer, job_queue)
예제 #2
0
def generate_code(protocol: callable,
                  conclave_config: CodeGenConfig,
                  mpc_frameworks: list,
                  local_frameworks: list,
                  apply_optimizations: bool = True):
    """
    Applies optimization rewrite passes to protocol, partitions resulting condag, and generates backend specific code for
    each sub-condag.
    :param protocol: protocol to compile
    :param conclave_config: conclave configuration
    :param mpc_frameworks: available mpc backend frameworks
    :param local_frameworks: available local-processing backend frameworks
    :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag
    :return: queue of job objects to be executed by dispatcher
    """

    # currently only allow one local and one mpc framework
    assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1

    # set up code gen config object
    if isinstance(conclave_config, CodeGenConfig):
        cfg = conclave_config
    else:
        cfg = CodeGenConfig.from_dict(conclave_config)

    # apply optimizations
    dag = condag.OpDag(protocol())
    # only apply optimizations if required
    if apply_optimizations:
        dag = comp.rewrite_dag(dag)
    # partition into subdags that will run in specific frameworks
    mapping = part.heupart(dag, mpc_frameworks, local_frameworks)
    # for each sub condag run code gen and add resulting job to job queue
    job_queue = []
    for job_num, (framework, sub_dag, stored_with) in enumerate(mapping):
        print(job_num, framework)
        if framework == "sharemind":
            name = "{}-sharemind-job-{}".format(cfg.name, job_num)
            job = SharemindCodeGen(cfg, sub_dag,
                                   cfg.pid).generate(name, cfg.output_path)
            job_queue.append(job)
        elif framework == "spark":
            name = "{}-spark-job-{}".format(cfg.name, job_num)
            job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path)
            job_queue.append(job)
        elif framework == "python":
            name = "{}-python-job-{}".format(cfg.name, job_num)
            job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path)
            job_queue.append(job)
        else:
            raise Exception("Unknown framework: " + framework)

        # TODO: this probably doesn't belong here
        if conclave_config.pid not in stored_with:
            job.skip = True
    return job_queue
예제 #3
0
    def check_workflow(self, dag, name):
        expected_rootdir = \
            "{}/sharemind_expected".format(os.path.dirname(os.path.realpath(__file__)))

        sm_cfg = SharemindCodeGenConfig()
        cfg = CodeGenConfig('cfg').with_sharemind_config(sm_cfg)
        cg = SharemindCodeGen(cfg, dag, 1)

        actual = cg._generate('code', '/tmp')[1]['miner']

        with open(expected_rootdir + '/{}'.format(name), 'r') as f:
            expected = f.read()

        self.assertEqual(expected, actual)
예제 #4
0
def party_proc(pid):

    sharemind_home = "/home/sharemind/Sharemind-SDK/sharemind/client"
    spark_master = "local"

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "localhost",
                "port": 9001
            },
            2: {
                "host": "localhost",
                "port": 9002
            },
            3: {
                "host": "localhost",
                "port": 9003
            }
        }
    }
    peer = conclave.net.setup_peer(sharemind_config)

    codegen_config = CodeGenConfig()

    job = SharemindCodeGen(codegen_config, join(),
                           pid).generate("job-" + str(pid), sharemind_home)
    job_queue = [job]
    conclave.dispatch.dispatch_all(spark_master, peer, job_queue)
예제 #5
0
    def check_workflow(self, dag, name, use_leaky_ops=True):
        self.maxDiff = None
        expected_rootdir = \
            "{}/sharemind_expected".format(os.path.dirname(os.path.realpath(__file__)))

        sm_cfg = SharemindCodeGenConfig()
        cfg = CodeGenConfig('cfg').with_sharemind_config(sm_cfg)
        cfg.use_leaky_ops = use_leaky_ops
        cg = SharemindCodeGen(cfg, dag, 1)

        actual = cg._generate('code', '/tmp')[1]['miner']

        with open(expected_rootdir + '/{}'.format(name),
                  'r') as f_specific, open(
                      expected_rootdir + '/{}'.format("base"), 'r') as f_base:
            expected_base = f_base.read()
            expected_specific = f_specific.read()
            expected = expected_base + expected_specific

        self.assertEqual(expected, actual)
예제 #6
0
if __name__ == "__main__":

    pid = int(sys.argv[1])

    workflow_name = "sharemind-ssn-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(workflow_name,
                                          "/mnt/shared",
                                          use_hdfs=False,
                                          use_docker=True)
    codegen_config = CodeGenConfig(workflow_name).with_sharemind_config(
        sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/ssn-data"
    codegen_config.output_path = "/mnt/shared/ssn-data"

    job = SharemindCodeGen(codegen_config, protocol(),
                           pid).generate("sharemind-0", "")
    job_queue = [job]

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "ca-spark-node-0",
                "port": 9001
            },
            2: {
                "host": "cb-spark-node-0",
                "port": 9002
            },
            3: {
                "host": "cc-spark-node-0",
예제 #7
0
def testHybridAggWorkflow():
    @dag_only
    def protocol():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        # define inputs
        colsInB = [
            defCol("a", "INTEGER", [2]),
            defCol("b", "INTEGER", [2]),
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["a", "b"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        # define inputs
        colsInC = [
            defCol("a", "INTEGER", [3]),
            defCol("b", "INTEGER", [3]),
        ]
        in3 = sal.create("in3", colsInC, set([3]))
        in3.isMPC = False

        projc = sal.project(in3, "projc", ["a", "b"])
        projc.isMPC = False
        projc.out_rel.storedWith = set([3])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True

        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        clC = sal._close(projc, "clC", set([1, 2, 3]))
        clC.isMPC = True

        comb = sal.concat([clA, clB, clC], "comb")
        comb.out_rel.storedWith = set([1, 2, 3])
        comb.isMPC = True

        shuffled = sal.shuffle(comb, "shuffled")
        shuffled.out_rel.storedWith = set([1, 2, 3])
        shuffled.isMPC = True

        persisted = sal._persist(shuffled, "persisted")
        persisted.out_rel.storedWith = set([1, 2, 3])
        persisted.isMPC = True

        keysclosed = sal.project(shuffled, "keysclosed", ["a"])
        keysclosed.out_rel.storedWith = set([1, 2, 3])
        keysclosed.isMPC = True

        keys = sal._open(keysclosed, "keys", 1)
        keys.isMPC = True

        indexed = sal.index(keys, "indexed", "rowIndex")
        indexed.isMPC = False
        indexed.out_rel.storedWith = set([1])

        sortedByKey = sal.sort_by(indexed, "sortedByKey", "a")
        sortedByKey.isMPC = False
        sortedByKey.out_rel.storedWith = set([1])

        eqFlags = sal._comp_neighs(sortedByKey, "eqFlags", "a")
        eqFlags.isMPC = False
        eqFlags.out_rel.storedWith = set([1])

        # TODO: hack to get keys stored
        # need to fix later!
        sortedByKey = sal.project(sortedByKey, "sortedByKey",
                                  ["rowIndex", "a"])
        sortedByKey.isMPC = False
        sortedByKey.out_rel.storedWith = set([1])

        closedEqFlags = sal._close(eqFlags, "closedEqFlags", set([1, 2, 3]))
        closedEqFlags.isMPC = True
        closedSortedByKey = sal._close(sortedByKey, "closedSortedByKey",
                                       set([1, 2, 3]))
        closedSortedByKey.isMPC = True

        agg = sal.index_aggregate(persisted, "agg", ["a"], "b", "+", "b",
                                  closedEqFlags, closedSortedByKey)
        agg.out_rel.storedWith = set([1, 2, 3])
        agg.isMPC = True

        sal._open(agg, "opened", 1)

        # create condag
        return set([in1, in2, in3])

    pid = int(sys.argv[1])
    size = sys.argv[2]

    workflow_name = "hybrid-agg-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(workflow_name,
                                          "/mnt/shared",
                                          use_hdfs=False,
                                          use_docker=True)
    codegen_config = CodeGenConfig(workflow_name).with_sharemind_config(
        sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/" + size
    codegen_config.output_path = "/mnt/shared/" + size

    dag = protocol()

    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag,
                                   pid).generate("sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config,
                                subdag).generate("python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "ca-spark-node-0",
                "port": 9001
            },
            2: {
                "host": "cb-spark-node-0",
                "port": 9002
            },
            3: {
                "host": "cc-spark-node-0",
                "port": 9003
            }
        }
    }
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
예제 #8
0
def testPublicJoinWorkflow():

    @dag_only
    def protocol():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        colsInB = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["c", "d"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True
        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        persistedA = sal._persist(clA, "persistedA")
        persistedB = sal._persist(clB, "persistedB")

        keysaclosed = sal.project(clA, "keysaclosed", ["a"])
        keysaclosed.out_rel.storedWith = set([1, 2, 3])
        keysaclosed.isMPC = True
        keysbclosed = sal.project(clB, "keysbclosed", ["c"])
        keysbclosed.isMPC = True
        keysbclosed.out_rel.storedWith = set([1, 2, 3])

        keysa = sal._open(keysaclosed, "keysa", 1)
        keysa.isMPC = True
        keysb = sal._open(keysbclosed, "keysb", 1)
        keysb.isMPC = True

        indexedA = sal.index(keysa, "indexedA", "indexA")
        indexedA.isMPC = False
        indexedA.out_rel.storedWith = set([1])
        indexedB = sal.index(keysb, "indexedB", "indexB")
        indexedB.isMPC = False
        indexedB.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexedA, indexedB, "joinedindeces", ["a"], ["c"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["indexA", "indexB"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        indecesclosed = sal._close(
            indecesonly, "indecesclosed", set([1, 2, 3]))
        indecesclosed.isMPC = True

        joined = sal._index_join(persistedA, persistedB, "joined",
                                 ["a"], ["c"], indecesclosed)
        joined.isMPC = True

        sal._open(joined, "opened", 1)

        # create condag
        return set([in1, in2])

    pid = int(sys.argv[1])
    workflow_name = "hybrid-join-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(
        workflow_name, "/mnt/shared", use_hdfs=False)
    codegen_config = CodeGenConfig(
        workflow_name).with_sharemind_config(sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared"
    codegen_config.output_path = "/mnt/shared"

    exampleutils.generate_data(pid, codegen_config.output_path)

    dag = protocol()
    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag, pid).generate(
                "sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config, subdag).generate(
                "python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = exampleutils.get_sharemind_config(pid, True)
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
    if pid == 1:
        expected = ['', '2,200,2001', '3,300,3001', '4,400,4001', '42,42,1001', '5,500,5001',
                    '6,600,6001', '7,700,7001', '7,800,7001', '7,900,7001', '8,1000,8001', '9,1100,9001']
        exampleutils.check_res(expected, "/mnt/shared/opened.csv")
        print("Success")
예제 #9
0
def testPublicJoinWorkflow():

    @dag_only
    def protocol():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        colsInB = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["c", "d"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True
        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        persistedA = sal._persist(clA, "persistedA")
        persistedA.isMPC = True
        persistedB = sal._persist(clB, "persistedB")
        persistedB.isMPC = True

        keysaclosed = sal.project(clA, "keysaclosed", ["a"])
        keysaclosed.out_rel.storedWith = set([1, 2, 3])
        keysaclosed.isMPC = True
        keysbclosed = sal.project(clB, "keysbclosed", ["c"])
        keysbclosed.isMPC = True
        keysbclosed.out_rel.storedWith = set([1, 2, 3])

        keysa = sal._open(keysaclosed, "keysa", 1)
        keysa.isMPC = True
        keysb = sal._open(keysbclosed, "keysb", 1)
        keysb.isMPC = True

        indexedA = sal.index(keysa, "indexedA", "indexA")
        indexedA.isMPC = False
        indexedA.out_rel.storedWith = set([1])
        indexedB = sal.index(keysb, "indexedB", "indexB")
        indexedB.isMPC = False
        indexedB.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexedA, indexedB, "joinedindeces", ["a"], ["c"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["indexA", "indexB"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        indecesclosed = sal._close(
            indecesonly, "indecesclosed", set([1, 2, 3]))
        indecesclosed.isMPC = True

        joined = sal._index_join(persistedA, persistedB, "joined", [
                                 "a"], ["c"], indecesclosed)
        joined.out_rel.storedWith = set([1, 2, 3])
        joined.isMPC = True

        sal._open(joined, "opened", 1)

        # create condag
        return set([in1, in2])

    pid = int(sys.argv[1])
    size = sys.argv[2]

    workflow_name = "public-join-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(
        workflow_name, "/mnt/shared", use_hdfs=False, use_docker=True)
    codegen_config = CodeGenConfig(
        workflow_name).with_sharemind_config(sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/hybridjoin/" + size
    codegen_config.output_path = "/mnt/shared/hybridjoin/" + size

    dag = protocol()
    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag, pid).generate(
                "sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config, subdag).generate(
                "python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {"host": "ca-spark-node-0", "port": 9001},
            2: {"host": "cb-spark-node-0", "port": 9002},
            3: {"host": "cc-spark-node-0", "port": 9003}
        }
    }
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
예제 #10
0
def generate_code(protocol: callable, cfg: CodeGenConfig, mpc_frameworks: list,
                  local_frameworks: list, apply_optimizations: bool = True):
    """
    Applies optimization rewrite passes to protocol, partitions resulting dag, and generates backend specific code
    for each sub-dag.
    :param protocol: protocol to compile
    :param cfg: conclave configuration
    :param mpc_frameworks: available mpc backend frameworks
    :param local_frameworks: available local-processing backend frameworks
    :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag
    :return: queue of job objects to be executed by dispatcher
    """

    dag = condag.OpDag(protocol())
    job_queue = []

    if "single-party-spark" not in set(mpc_frameworks) and "single-party-python" not in set(mpc_frameworks):

        # currently only allow one local and one mpc framework
        assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1

        # only apply optimizations if required
        if apply_optimizations:
            dag = comp.rewrite_dag(dag, all_parties=cfg.all_pids, use_leaky_ops=cfg.use_leaky_ops)

        # partition into sub-dags that will run in specific frameworks
        mapping = part.heupart(dag, mpc_frameworks, local_frameworks)

        # for each sub-dag run code gen and add resulting job to job queue
        for job_num, (framework, sub_dag, stored_with) in enumerate(mapping):
            print(job_num, framework)
            if framework == "sharemind":
                name = "{}-sharemind-job-{}".format(cfg.name, job_num)
                job = SharemindCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path)
                job_queue.append(job)
            elif framework == "spark":
                name = "{}-spark-job-{}".format(cfg.name, job_num)
                job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path)
                job_queue.append(job)
            elif framework == "python":
                name = "{}-python-job-{}".format(cfg.name, job_num)
                job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path)
                job_queue.append(job)
            elif framework == "obliv-c":
                name = "{}-oblivc-job-{}".format(cfg.name, job_num)
                job = OblivcCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path)
                job_queue.append(job)
            elif framework == "jiff":
                name = "{}-jiff-job-{}".format(cfg.name, job_num)
                job = JiffCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path)
                job_queue.append(job)
            else:
                raise Exception("Unknown framework: " + framework)

            # TODO: this probably doesn't belong here
            if cfg.pid not in stored_with:
                job.skip = True

    else:

        assert len(mpc_frameworks) == 1

        if mpc_frameworks[0] == "single-party-spark":

            name = "{}-spark-job-0".format(cfg.name)
            job = SinglePartyCodegen(cfg, dag, "spark").generate(name, cfg.output_path)
            job_queue.append(job)

        elif mpc_frameworks[0] == "single-party-python":

            name = "{}-python-job-0".format(cfg.name)
            job = SinglePartyCodegen(cfg, dag, "python").generate(name, cfg.output_path)
            job_queue.append(job)

        else:

            raise Exception("Unknown framework: {}".format(mpc_frameworks[0]))

    return job_queue
예제 #11
0
def testHybridJoinWorkflow():

    def hybrid_join():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        colsInB = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["c", "d"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True
        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        shuffledA = sal.shuffle(clA, "shuffledA")
        shuffledA.isMPC = True
        persistedA = sal._persist(shuffledA, "persistedA")
        persistedA.isMPC = True
        shuffledB = sal.shuffle(clB, "shuffledB")
        shuffledB.isMPC = True
        persistedB = sal._persist(shuffledB, "persistedB")
        persistedB.isMPC = True

        keysaclosed = sal.project(shuffledA, "keysaclosed", ["a"])
        keysaclosed.out_rel.storedWith = set([1, 2, 3])
        keysaclosed.isMPC = True
        keysbclosed = sal.project(shuffledB, "keysbclosed", ["c"])
        keysbclosed.isMPC = True
        keysbclosed.out_rel.storedWith = set([1, 2, 3])

        keysa = sal._open(keysaclosed, "keysa", 1)
        keysa.isMPC = True
        keysb = sal._open(keysbclosed, "keysb", 1)
        keysb.isMPC = True

        indexedA = sal.index(keysa, "indexedA", "indexA")
        indexedA.isMPC = False
        indexedA.out_rel.storedWith = set([1])
        indexedB = sal.index(keysb, "indexedB", "indexB")
        indexedB.isMPC = False
        indexedB.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexedA, indexedB, "joinedindeces", ["a"], ["c"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["indexA", "indexB"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        indecesclosed = sal._close(
            indecesonly, "indecesclosed", set([1, 2, 3]))
        indecesclosed.isMPC = True

        joined = sal._index_join(persistedA, persistedB, "joined", [
                                 "a"], ["c"], indecesclosed)
        joined.isMPC = True

        return joined, set([in1, in2])

    def hybrid_agg(in1):

        shuffled = sal.shuffle(in1, "shuffled")
        shuffled.out_rel.storedWith = set([1, 2, 3])
        shuffled.isMPC = True

        persisted = sal._persist(shuffled, "persisted")
        persisted.out_rel.storedWith = set([1, 2, 3])
        persisted.isMPC = True
        
        keysclosed = sal.project(shuffled, "keysclosed", ["b"])
        keysclosed.out_rel.storedWith = set([1, 2, 3])
        keysclosed.isMPC = True
        
        keys = sal._open(keysclosed, "keys", 1)
        keys.isMPC = True
        
        indexed = sal.index(keys, "indexed", "rowIndex")
        indexed.isMPC = False
        indexed.out_rel.storedWith = set([1])
        
        distinctKeys = sal.distinct(keys, "distinctKeys", ["b"])
        distinctKeys.isMPC = False
        distinctKeys.out_rel.storedWith = set([1])

        # TODO: hack to get keys stored
        # need to fix later!
        fakeDistinctKeys = sal.distinct(keys, "distinctKeys", ["b"])
        fakeDistinctKeys.isMPC = False
        fakeDistinctKeys.out_rel.storedWith = set([1])

        indexedDistinct = sal.index(distinctKeys, "indexedDistinct", "keyIndex")
        indexedDistinct.isMPC = False
        indexedDistinct.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexed, indexedDistinct, "joinedindeces", ["b"], ["b"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        # TODO: could project row indeces away too
        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["rowIndex", "keyIndex"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        closedDistinct = sal._close(distinctKeys, "closedDistinct", set([1, 2, 3]))
        closedDistinct.isMPC = True
        closedLookup = sal._close(indecesonly, "closedLookup", set([1, 2, 3]))
        closedLookup.isMPC = True

        agg = sal.index_aggregate(persisted, "agg", ["b"], "d", "+", "d", closedLookup, closedDistinct)
        agg.isMPC = True
        sal._open(agg, "aggopened", 1)

    def protocol():

        joinedres, inputs = hybrid_join()
        hybrid_agg(joinedres)
        return saldag.OpDag(inputs)

    pid = int(sys.argv[1])
    workflow_name = "ssn-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(
        workflow_name, "/mnt/shared", use_hdfs=False, use_docker=False)
    codegen_config = CodeGenConfig(
        workflow_name).with_sharemind_config(sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared"
    codegen_config.output_path = "/mnt/shared"

    exampleutils.generate_ssn_data(pid, codegen_config.output_path)

    dag = protocol()
    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag, pid).generate(
                "sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config, subdag).generate(
                "python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = exampleutils.get_sharemind_config(pid, True)
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
    if pid == 1:
        expected = ['', '1,30', '2,50', '3,30']
        exampleutils.check_res(expected, "/mnt/shared/aggopened.csv")
        print("Success")