Ejemplo n.º 1
0
def local_main():
    current_dir = os.path.dirname(os.path.realpath(__file__))
    data_path = os.path.join(current_dir, "data")
    for pid in {"1", "2"}:
        # define name for the workflow
        workflow_name = "aspirin-local-test-" + pid
        # configure conclave
        conclave_config = CodeGenConfig(workflow_name, int(pid))
        conclave_config.all_pids = [int(pid)]
        sharemind_conf = SharemindCodeGenConfig("/mnt/shared", use_docker=False, use_hdfs=False)
        conclave_config.with_sharemind_config(sharemind_conf)
        # point conclave to the directory where the generated code should be stored/ read from
        conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
        # point conclave to directory where data is to be read from...
        conclave_config.input_path = data_path
        # and written to
        conclave_config.output_path = data_path
        suffix = "left" if pid == "1" else "right"
        # define this party's unique ID (in this demo there is only one party)
        job_queue = generate_code(lambda: protocol_local(suffix, int(pid)), conclave_config, ["sharemind"], ["python"],
                                  apply_optimizations=False)
        dispatch_jobs(job_queue, conclave_config)

    res_mpc = read_rel(data_path + "/" + "actual_mpc_open.csv")
    res_left = read_rel(data_path + "/" + "actual_left.csv")
    res_right = read_rel(data_path + "/" + "actual_right.csv")
    assert len(res_mpc) == 1
    assert len(res_left) == 1
    assert len(res_right) == 1
    res = [[res_mpc[0][0] + res_left[0][0] + res_right[0][0]]]
    write_rel(data_path, "actual_open.csv", res, "1")
Ejemplo n.º 2
0
def main_mpc(pid: str, mpc_backend: str):
    # define name for the workflow
    workflow_name = "real-aspirin-partitioned-" + pid
    # configure conclave
    mpc_backend = sys.argv[2]
    conclave_config = CodeGenConfig(workflow_name, int(pid)) \
        .with_default_mpc_config(mpc_backend)
    current_dir = os.path.dirname(os.path.realpath(__file__))
    # point conclave to the directory where the generated code should be stored/ read from
    conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
    # point conclave to directory where data is to be read from...
    conclave_config.input_path = os.path.join(current_dir, "data")
    # and written to
    conclave_config.output_path = os.path.join(current_dir, "data")
    job_queue = generate_code(lambda: protocol_mpc(conclave_config.all_pids), conclave_config, [mpc_backend],
                              ["python"], apply_optimizations=True)
    dispatch_jobs(job_queue, conclave_config)
Ejemplo n.º 3
0
def generate_code(protocol: callable,
                  conclave_config: CodeGenConfig,
                  mpc_frameworks: list,
                  local_frameworks: list,
                  apply_optimizations: bool = True):
    """
    Applies optimization rewrite passes to protocol, partitions resulting condag, and generates backend specific code for
    each sub-condag.
    :param protocol: protocol to compile
    :param conclave_config: conclave configuration
    :param mpc_frameworks: available mpc backend frameworks
    :param local_frameworks: available local-processing backend frameworks
    :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag
    :return: queue of job objects to be executed by dispatcher
    """

    # currently only allow one local and one mpc framework
    assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1

    # set up code gen config object
    if isinstance(conclave_config, CodeGenConfig):
        cfg = conclave_config
    else:
        cfg = CodeGenConfig.from_dict(conclave_config)

    # apply optimizations
    dag = condag.OpDag(protocol())
    # only apply optimizations if required
    if apply_optimizations:
        dag = comp.rewrite_dag(dag)
    # partition into subdags that will run in specific frameworks
    mapping = part.heupart(dag, mpc_frameworks, local_frameworks)
    # for each sub condag run code gen and add resulting job to job queue
    job_queue = []
    for job_num, (framework, sub_dag, stored_with) in enumerate(mapping):
        print(job_num, framework)
        if framework == "sharemind":
            name = "{}-sharemind-job-{}".format(cfg.name, job_num)
            job = SharemindCodeGen(cfg, sub_dag,
                                   cfg.pid).generate(name, cfg.output_path)
            job_queue.append(job)
        elif framework == "spark":
            name = "{}-spark-job-{}".format(cfg.name, job_num)
            job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path)
            job_queue.append(job)
        elif framework == "python":
            name = "{}-python-job-{}".format(cfg.name, job_num)
            job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path)
            job_queue.append(job)
        else:
            raise Exception("Unknown framework: " + framework)

        # TODO: this probably doesn't belong here
        if conclave_config.pid not in stored_with:
            job.skip = True
    return job_queue
Ejemplo n.º 4
0
    def check_workflow(self, dag, name, use_leaky_ops=True):
        self.maxDiff = None
        expected_rootdir = \
            "{}/sharemind_expected".format(os.path.dirname(os.path.realpath(__file__)))

        sm_cfg = SharemindCodeGenConfig()
        cfg = CodeGenConfig('cfg').with_sharemind_config(sm_cfg)
        cfg.use_leaky_ops = use_leaky_ops
        cg = SharemindCodeGen(cfg, dag, 1)

        actual = cg._generate('code', '/tmp')[1]['miner']

        with open(expected_rootdir + '/{}'.format(name),
                  'r') as f_specific, open(
                      expected_rootdir + '/{}'.format("base"), 'r') as f_base:
            expected_base = f_base.read()
            expected_specific = f_specific.read()
            expected = expected_base + expected_specific

        self.assertEqual(expected, actual)
Ejemplo n.º 5
0
    def check_workflow(self, dag, name):
        expected_rootdir = \
            "{}/sharemind_expected".format(os.path.dirname(os.path.realpath(__file__)))

        sm_cfg = SharemindCodeGenConfig()
        cfg = CodeGenConfig('cfg').with_sharemind_config(sm_cfg)
        cg = SharemindCodeGen(cfg, dag, 1)

        actual = cg._generate('code', '/tmp')[1]['miner']

        with open(expected_rootdir + '/{}'.format(name), 'r') as f:
            expected = f.read()

        self.assertEqual(expected, actual)
Ejemplo n.º 6
0
def run_local(pid: str, data_root: str):
    workflow_name = "aspirin-local-join-" + pid + "-" + data_root
    conclave_config = CodeGenConfig(workflow_name, int(pid))
    conclave_config.all_pids = [int(pid)]
    sharemind_conf = SharemindCodeGenConfig("/mnt/shared",
                                            use_docker=False,
                                            use_hdfs=False)
    conclave_config.with_sharemind_config(sharemind_conf)
    conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
    conclave_config.input_path = os.path.join("/mnt/shared", data_root)
    conclave_config.output_path = os.path.join("/mnt/shared", data_root)
    suffix = "left" if pid == "1" else "right"

    job_queue = generate_code(lambda: protocol_local(suffix, int(pid)),
                              conclave_config, ["sharemind"], ["python"],
                              apply_optimizations=False)
    dispatch_jobs(job_queue, conclave_config)
Ejemplo n.º 7
0
def run_mpc(pid: str, data_root: str, mpc_backend: str):
    workflow_name = "aspirin-mpc-join-" + pid + "-" + data_root

    conclave_config = CodeGenConfig(workflow_name, int(pid))
    conclave_config.use_leaky_ops = False

    conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
    conclave_config.input_path = os.path.join("/mnt/shared", data_root)
    conclave_config.output_path = os.path.join("/mnt/shared", data_root)

    job_queue = generate_code(lambda: protocol_mpc(conclave_config.all_pids),
                              conclave_config, [mpc_backend], ["python"],
                              apply_optimizations=True)
    dispatch_jobs(job_queue, conclave_config)
Ejemplo n.º 8
0
    right_cols = [defCol("c", "INTEGER", [1]), defCol("d", "INTEGER", [1])]
    right = cc.create("right", right_cols, {1})

    joined = cc.join(left, right, "joined", ["a"], ["c"])
    cc.aggregate(joined, "expected", ["b"], "d", "sum", "total")

    return {left, right}


if __name__ == "__main__":
    pid = sys.argv[1]
    # define name for the workflow
    workflow_name = "simple-oblivious-test-" + pid
    # configure conclave
    conclave_config = CodeGenConfig(workflow_name, int(pid))
    conclave_config.all_pids = [1]
    sharemind_conf = SharemindCodeGenConfig("/mnt/shared",
                                            use_docker=False,
                                            use_hdfs=False)
    conclave_config.with_sharemind_config(sharemind_conf)
    current_dir = os.path.dirname(os.path.realpath(__file__))
    # point conclave to the directory where the generated code should be stored/ read from
    conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
    # point conclave to directory where data is to be read from...
    conclave_config.input_path = os.path.join(current_dir, "data")
    # and written to
    conclave_config.output_path = os.path.join(current_dir, "data")
    # define this party's unique ID (in this demo there is only one party)
    job_queue = generate_code(protocol,
                              conclave_config, ["sharemind"], ["python"],
Ejemplo n.º 9
0
    heart_patients = cc.cc_filter(aspirin,
                                  "heart_patients",
                                  diag_col_diags,
                                  "==",
                                  scalar=1)

    cc.collect(cc.distinct_count(heart_patients, "actual", pid_col_meds), 1)

    return {left_medication, left_diagnosis, right_medication, right_diagnosis}


if __name__ == "__main__":
    pid = sys.argv[1]
    # define name for the workflow
    workflow_name = "real-aspirin-test-" + pid
    # configure conclave
    mpc_backend = sys.argv[2]
    conclave_config = CodeGenConfig(workflow_name, int(pid)) \
        .with_default_mpc_config(mpc_backend)
    current_dir = os.path.dirname(os.path.realpath(__file__))
    # point conclave to the directory where the generated code should be stored/ read from
    conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
    # point conclave to directory where data is to be read from...
    conclave_config.input_path = os.path.join(current_dir, "data")
    # and written to
    conclave_config.output_path = os.path.join(current_dir, "data")
    job_queue = generate_code(lambda: protocol(conclave_config.all_pids),
                              conclave_config, [mpc_backend], ["python"],
                              apply_optimizations=True)
    dispatch_jobs(job_queue, conclave_config)
Ejemplo n.º 10
0
def heupart(dag: Dag, mpc_frameworks: list, local_frameworks: list):
    """ Non-exhaustive partition. Returns best partition with respect to certain heuristics. """

    def get_stored_with(node: OpNode):
        """ Returns stored_with set of out_rel or in_rel of a node, depending on it's type. """
        if isinstance(node, Open):
            return node.get_in_rel().stored_with
        elif isinstance(node, Create):
            return get_stored_with(next(iter(node.children)))
        else:
            return node.out_rel.stored_with

    def is_correct_mode(node: OpNode, available: set, stored_with: set):
        """ Verifies that node is stored with same set of parties passed to this function. """

        if get_stored_with(node) != stored_with:
            return False

        # otherwise check parents
        return node.parents.issubset(available) or not (node.parents or available)

    def can_partition(dag: Dag, stored_with: set, top_available: set):
        """ Returns whether the Dag passed to it can be partitioned. """

        # copy so we don't overwrite global available nodes in this pass
        available = deepcopy(top_available)
        ordered = dag.top_sort()
        unavailable = set()

        for node in ordered:
            if node in unavailable and get_stored_with(node) == stored_with:
                for parent in node.parents:
                    if parent in available and not isinstance(parent, Persist):
                        return False
            if is_correct_mode(node, available, stored_with):
                available.add(node)
            else:
                # mark all descendants as unavailable
                descendants = Dag(set([node])).get_all_nodes()
                unavailable = unavailable.union(descendants)
        return True

    def disconnect_at_roots(current_dag: Dag, available: set, new_roots: list):

        previous_parents = set()
        create_op_lookup = dict()
        for root in new_roots:
            for parent in copy(root.parents):
                if parent in available:
                    create_op = None
                    if parent not in previous_parents:
                        create_op = Create(deepcopy(parent.out_rel))
                        # create op is in same mode as root
                        create_op.is_mpc = root.is_mpc
                        previous_parents.add(parent)
                        create_op_lookup[parent.out_rel.name] = create_op
                    else:
                        create_op = create_op_lookup[parent.out_rel.name]
                    # unlink root from parent
                    parent.children.remove(root)
                    # insert create op between parent and root
                    root.replace_parent(parent, create_op)
                    # connect create op with root
                    create_op.children.add(root)
                    # keep track of parents we have already visited
                    previous_parents.add(parent)
                    create_op_lookup[create_op.out_rel.name] = create_op
            if root in current_dag.roots:
                current_dag.roots.remove(root)

        parent_roots = set().union(*[root.parents for root in new_roots])
        for root in new_roots:
            if isinstance(root, Create):
                parent_roots.add(root)

        return OpDag(set(parent_roots)), available

    def find_new_roots(current_dag: Dag, available: set, stored_with: set):

        # need topological ordering
        ordered = current_dag.top_sort()
        
        # roots of the next subdag, i.e., where the current subdag will end
        new_roots = []

        # traverse current condag until all boundary nodes are hit
        for node in ordered:
            if is_correct_mode(node, available, stored_with):
                available.add(node)
            elif (not node.parents) or (node.parents & available):
                if node not in new_roots:
                    new_roots.append(node)
        
        # roots of the next subdag
        return new_roots

    def next_partition(nextdag, available, holding_parties):

        # roots of the next subdag
        new_roots = find_new_roots(nextdag, available, holding_parties)
        # disconnect current dags at new root nodes and return the disconnected
        # bottom condag
        return disconnect_at_roots(nextdag, available, new_roots)

    def _merge_dags(left_dag, right_dag):

        # TODO: should go inside dagutils, once dagutils exists
        # to merge, we only need to combine roots
        roots = left_dag.roots.union(right_dag.roots)
        return OpDag(roots)

    def next_holding_ps(nextdag, available):

        roots = nextdag.roots
        for root in sorted(roots, key=lambda node: node.out_rel.name):
            holding_ps = get_stored_with(root)
            if can_partition(nextdag, holding_ps, available):
                return holding_ps, root.is_mpc
        raise Exception("Found no roots to partition on")

    def merge_neighbor_dags(mapping):

        updated_mapping = []
        prev_fmwk, prev_subdag, stored_with = None, None, None

        for fmwk, subdag, stored_with in mapping:
            # we can merge neighboring subdags if they're mapped to the same
            # framework and are stored by same parties
            if fmwk == prev_fmwk and stored_with == prev_fmwk:
                # merge dags together
                merged_dag = _merge_dags(prev_subdag, subdag)
                # pop previous subdag
                updated_mapping = updated_mapping[:-1]
                updated_mapping.append((fmwk, merged_dag, stored_with))
            else:
                # can't merge, so just add subdag to result
                updated_mapping.append((fmwk, subdag, stored_with))
            # keep track of previous values
            prev_fmwk = fmwk
            prev_subdag = subdag
        return updated_mapping

    assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1
    nextdag = dag
    mapping = []
    available = set()

    iterations = 0
    iteration_limit = 100

    local_fmwk = local_frameworks[0]
    mpc_fmwk = mpc_frameworks[0]

    while nextdag.roots:
        if iterations > iteration_limit:
            raise Exception("Reached iteration limit while partitioning")
        # find holding set and mpc mode of next valid partition
        holding_ps, mpcmode = next_holding_ps(nextdag, available)
        # select framework
        fmwk = mpc_fmwk if mpcmode else local_fmwk
        # store mapping
        mapping.append((fmwk, nextdag, holding_ps))
        # partition next subdag
        nextdag, available = next_partition(nextdag, available, holding_ps)
        # increment iteration count
        iterations += 1
    
    for fmwk, subdag, stored_with in mapping:
        print(ScotchCodeGen(CodeGenConfig(), subdag)._generate(0, 0))

    merged = merge_neighbor_dags(mapping)
    return merged
Ejemplo n.º 11
0
def main():
    pid = sys.argv[1]
    data_root = sys.argv[2]
    mpc_backend = sys.argv[3]

    # define name for the workflow
    workflow_name = "aspirin-large-join-" + pid + "-" + data_root
    # configure conclave
    conclave_config = CodeGenConfig(workflow_name, int(pid))
    if mpc_backend == "sharemind":
        sharemind_conf = SharemindCodeGenConfig("/mnt/shared",
                                                use_docker=True,
                                                use_hdfs=False)
        conclave_config.with_sharemind_config(sharemind_conf)
    elif mpc_backend == "obliv-c":
        conclave_config.all_pids = [1, 2]
        net_conf = [{
            "host": "ca-spark-node-0",
            "port": 8001
        }, {
            "host": "cb-spark-node-0",
            "port": 8002
        }]
        net = NetworkConfig(net_conf, int(pid))
        conclave_config.with_network_config(net)

        oc_conf = OblivcConfig("/obliv-c/bin/oblivcc", "ca-spark-node-0:9000")
        conclave_config.with_oc_config(oc_conf)
    else:
        raise Exception("Unknown MPC backend {}".format(mpc_backend))

    conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
    conclave_config.input_path = os.path.join("/mnt/shared", data_root)
    conclave_config.output_path = os.path.join("/mnt/shared", data_root)

    job_queue = generate_code(protocol,
                              conclave_config, [mpc_backend], ["python"],
                              apply_optimizations=True)
    dispatch_jobs(job_queue, conclave_config)
Ejemplo n.º 12
0
    company1_cols = [
        defCol("c", "INTEGER", 1, 3),
        defCol("d", "INTEGER", 3)
    ]
    company1 = cc.create("company1", company1_cols, {3})

    companies = cc.concat([company0, company1], "companies")

    joined = cc.join(govreg, companies, "joined", ["a"], ["c"])
    actual = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total")
    cc.collect(actual, 1)

    return {govreg, company0, company1}


if __name__ == "__main__":
    pid = sys.argv[1]
    data_root = sys.argv[2]
    workflow_name = "ssn-benchmark" + pid + "-" + data_root
    conclave_config = CodeGenConfig(workflow_name, int(pid))
    sharemind_conf = SharemindCodeGenConfig("/mnt/shared", use_docker=True, use_hdfs=False)
    conclave_config.with_sharemind_config(sharemind_conf)
    conclave_config.use_leaky_ops = True
    current_dir = os.path.dirname(os.path.realpath(__file__))
    conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
    conclave_config.input_path = os.path.join("/mnt/shared", data_root)
    conclave_config.output_path = os.path.join("/mnt/shared", data_root)
    job_queue = generate_code(protocol, conclave_config, ["sharemind"], ["python"], apply_optimizations=True)
    dispatch_jobs(job_queue, conclave_config)
Ejemplo n.º 13
0
    squared = lang.multiply(input_relation, "squared", "column_b",
                            ["column_b", "column_b"])
    # sum group by column_a on column_b and rename group-over column to summed
    lang.aggregate(squared, "aggregated", ["column_a"], "column_b", "+",
                   "summed")
    # leaf nodes are automatically written to file so aggregated will be written to ./data/aggregated.csv

    # return all input relations
    return {input_relation}


if __name__ == "__main__":
    # define name for the workflow
    workflow_name = "python-demo"
    # configure conclave
    conclave_config = CodeGenConfig(workflow_name)
    # need the absolute path to current directory
    current_dir = os.path.dirname(os.path.realpath(__file__))
    # point conclave to the directory where the generated code should be stored/ read from
    conclave_config.code_path = os.path.join(current_dir, workflow_name)
    # point conclave to directory where data is to be read from...
    conclave_config.input_path = os.path.join(current_dir, "data")
    # and written to
    conclave_config.output_path = os.path.join(current_dir, "data")
    # define this party's unique ID (in this demo there is only one party)
    conclave_config.pid = 1
    # define all parties involved in this workflow
    conclave_config.all_pids = [1]
    # compile and execute protocol, specifying available mpc and local processing backends
    generate_and_dispatch(protocol, conclave_config, ["sharemind"], ["python"])