Example #1
0
def protocol_local(suffix: str, pid: int):
    pid_col_meds = "0"
    med_col_meds = "4"
    date_col_meds = "7"

    pid_col_diags = "8"
    diag_col_diags = "16"
    date_col_diags = "18"

    num_med_cols = 8
    num_diag_cols = 13

    left_medication_cols = [defCol(str(i), "INTEGER", pid) for i in range(num_med_cols)]
    medication = cc.create(suffix + "_medication", left_medication_cols, {pid})
    left_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", pid) for i in range(num_diag_cols)]
    diagnosis = cc.create(suffix + "_diagnosis", left_diagnosis_cols, {pid})

    shared_pids = cc.create("a_{}_shared_pids".format(suffix), [defCol(pid_col_meds, "INTEGER", pid)], {pid})

    # only keep relevant columns
    medication_proj = cc.project(medication, "medication_proj", [pid_col_meds, med_col_meds, date_col_meds])
    medication_mine = cc.filter_by(medication_proj, "medication_mine", pid_col_meds, shared_pids, use_not_in=True)

    diagnosis_proj = cc.project(diagnosis, "diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags])
    diagnosis_mine = cc.filter_by(diagnosis_proj, "diagnosis_mine", pid_col_diags, shared_pids, use_not_in=True)

    joined = cc.join(medication_mine, diagnosis_mine, "joined", [pid_col_meds], [pid_col_diags])

    cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds)
    aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1)
    heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1)

    cc.distinct_count(heart_patients, "actual_" + suffix, pid_col_meds)

    return {medication, diagnosis}
Example #2
0
def protocol():
    cols_in_1 = [defCol("a", "INTEGER", 1), defCol("b", "INTEGER", 1)]
    in_1 = cc.create("in_1", cols_in_1, {1})
    cols_in_2 = [defCol("a", "INTEGER", 2), defCol("b", "INTEGER", 2)]
    in_2 = cc.create("in_2", cols_in_2, {2})
    cc._pub_intersect(in_1, "actual_1", "a")
    cc._pub_intersect(in_2, "actual_2", "a", is_server=False)
    return {in_1, in_2}
Example #3
0
def protocol():
    left_cols = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    left = cc.create("left", left_cols, {1})

    right_cols = [defCol("c", "INTEGER", [1]), defCol("d", "INTEGER", [1])]
    right = cc.create("right", right_cols, {1})

    joined = cc.join(left, right, "joined", ["a"], ["c"])
    cc.aggregate(joined, "expected", ["b"], "d", "sum", "total")

    return {left, right}
Example #4
0
def protocol():
    left_cols = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    left = cc.create("left", left_cols, {1})
    left_dummy = cc.project(left, "left_dummy", ["a", "b"])

    right_cols = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])]
    right = cc.create("right", right_cols, {2})
    right_dummy = cc.project(right, "right_dummy", ["c", "d"])

    joined = cc.join(left_dummy, right_dummy, "joined", ["a"], ["c"])
    cc.collect(cc.aggregate(joined, "actual", ["b"], "d", "sum", "total"), 1)

    return {left, right}
Example #5
0
def protocol():
    left_one_cols = [
        defCol("a", "INTEGER", 1, 2, 3),
        defCol("b", "INTEGER", 1)
    ]
    left_one = cc.create("left_one", left_one_cols, {1})

    right_one_cols = [
        defCol("c", "INTEGER", 1, 2, 3),
        defCol("d", "INTEGER", 1)
    ]
    right_one = cc.create("right_one", right_one_cols, {1})

    left_two_cols = [
        defCol("a", "INTEGER", 1, 2, 3),
        defCol("b", "INTEGER", 2)
    ]
    left_two = cc.create("left_two", left_two_cols, {2})

    right_two_cols = [
        defCol("c", "INTEGER", 1, 2, 3),
        defCol("d", "INTEGER", 2)
    ]
    right_two = cc.create("right_two", right_two_cols, {2})

    left = cc.concat([left_one, left_two], "left")
    right = cc.concat([right_one, right_two], "right")

    joined = cc.join(left, right, "actual", ["a"], ["c"])
    cc.collect(joined, 1)

    return {left_one, left_two, right_one, right_two}
Example #6
0
def protocol():
    input_columns_left = [
        defCol("a", "INTEGER", [1]),
        defCol("b", "INTEGER", [1])
    ]
    left = cc.create("left", input_columns_left, {1})
    input_columns_right = [
        defCol("c", "INTEGER", [1]),
        defCol("d", "INTEGER", [1])
    ]
    right = cc.create("right", input_columns_right, {1})
    expected = cc.join(left, right, "expected", ["a"], ["c"])

    return {left, right}
Example #7
0
def protocol():
    input_columns_left = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    left = cc.create("left", input_columns_left, {1})
    input_columns_right = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_c", "INTEGER", [1])
    ]
    right = cc.create("right", input_columns_right, {1})
    cc.collect(
        cc.aggregate(cc.concat([left, right], "rel"), "expected", ["column_a"],
                     "column_b", "sum", "total_b"), 1)
    return {left, right}
Example #8
0
def protocol():
    input_columns_left = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    left = cc.create("left", input_columns_left, {1})
    input_columns_right = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    right = cc.create("right", input_columns_right, {1})
    rel = cc.concat([left, right], "rel")
    filtered = cc.cc_filter(rel, "filtered", "column_b", "==", scalar=1)
    in_order = cc.sort_by(filtered, "in_order", "column_a")
    cc.distinct_count(in_order, "expected", "column_a")
    return {left, right}
Example #9
0
def protocol():
    input_columns_left = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    left = cc.create("left", input_columns_left, {1})
    input_columns_right = [
        defCol("column_a", "INTEGER", [1], [2]),
        defCol("column_c", "INTEGER", [1])
    ]
    right = cc.create("right", input_columns_right, {2})
    aggregated = cc.aggregate(cc.concat([left, right], "rel"), "actual",
                              ["column_a"], "column_b", "sum", "total_b")
    actual_open = cc.project(aggregated, "actual_open",
                             ["column_a", "total_b"])
    cc.collect(actual_open, 1)
    return {left, right}
Example #10
0
def protocol():
    diagnosis_col = "12"
    num_diagnosis_cols = 13

    left_diagnosis_cols = [
        defCol(str(i), "INTEGER", 1) for i in range(num_diagnosis_cols)
    ]
    left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1})

    right_diagnosis_cols = [
        defCol(str(i), "INTEGER", 2) for i in range(num_diagnosis_cols)
    ]
    right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2})

    cohort = cc.concat([left_diagnosis, right_diagnosis], "cohort")
    counts = cc.aggregate_count(cohort, "counts", [diagnosis_col], "total")
    cc.collect(cc.sort_by(counts, "actual", "total"), 1)

    return {left_diagnosis, right_diagnosis}
Example #11
0
def protocol():
    # define inputs
    left_cols = [
        defCol("a", "INTEGER", [1]),
        defCol("b", "INTEGER", [1]),
    ]
    left = cc.create("left", left_cols, {1})
    left_dummy = cc.project(left, "zzz_left_dummy", ["a", "b"])

    right_cols = [
        defCol("c", "INTEGER", [1], [2]),
        defCol("d", "INTEGER", [2])
    ]
    right = cc.create("right", right_cols, {2})
    right_dummy = cc.project(right, "right_dummy", ["c", "d"])

    actual = cc.join(left_dummy, right_dummy, "actual", ["a"], ["c"])

    cc.collect(actual, 1)
    # create dag
    return {left, right}
Example #12
0
def protocol():
    govreg_cols = [
        defCol("a", "INTEGER", 1),
        defCol("b", "INTEGER", 1)
    ]
    govreg = cc.create("govreg", govreg_cols, {1})

    company0_cols = [
        defCol("c", "INTEGER", 1, 2),
        defCol("d", "INTEGER", 2)
    ]
    company0 = cc.create("company0", company0_cols, {2})

    company1_cols = [
        defCol("c", "INTEGER", 1, 3),
        defCol("d", "INTEGER", 3)
    ]
    company1 = cc.create("company1", company1_cols, {3})

    companies = cc.concat([company0, company1], "companies")

    joined = cc.join(govreg, companies, "joined", ["a"], ["c"])
    actual = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total")
    cc.collect(actual, 1)

    return {govreg, company0, company1}
Example #13
0
def protocol():
    """
    A demo protocol which reads data from data/input_relation.csv, computes a multiplication, followed by an aggregation,
    and stores the result under data/aggregated.csv.
    :return set of input relations
    """
    # define the input schema, providing column name, type, and trust set
    input_columns = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    # define input relation, providing relation name, columns, and owner set
    input_relation = lang.create("input_relation", input_columns, {1})
    # square column_b, i.e., compute (column_a, column_b) -> (column_a, column_b * column_b)
    squared = lang.multiply(input_relation, "squared", "column_b",
                            ["column_b", "column_b"])
    # sum group by column_a on column_b and rename group-over column to summed
    lang.aggregate(squared, "aggregated", ["column_a"], "column_b", "+",
                   "summed")
    # leaf nodes are automatically written to file so aggregated will be written to ./data/aggregated.csv

    # return all input relations
    return {input_relation}
Example #14
0
def protocol():
    left_one_cols = [defCol("a", "INTEGER", 1), defCol("b", "INTEGER", 1)]
    left_one = cc.create("left_one", left_one_cols, {1})

    right_one_cols = [defCol("c", "INTEGER", 1), defCol("d", "INTEGER", 1)]
    right_one = cc.create("right_one", right_one_cols, {1})

    left_two_cols = [defCol("a", "INTEGER", 1), defCol("b", "INTEGER", 1)]
    left_two = cc.create("left_two", left_two_cols, {1})

    right_two_cols = [defCol("c", "INTEGER", 1), defCol("d", "INTEGER", 1)]
    right_two = cc.create("right_two", right_two_cols, {1})

    left = cc.concat([left_one, left_two], "left")
    right = cc.concat([right_one, right_two], "right")

    cc.join(left, right, "expected", ["a"], ["c"])

    return {left_one, left_two, right_one, right_two}
Example #15
0
def protocol():
    govreg_cols = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    govreg = cc.create("govreg", govreg_cols, {1})
    company0_cols = [defCol("c", "INTEGER", [1]), defCol("d", "INTEGER", [1])]
    company0 = cc.create("company0", company0_cols, {1})
    company1_cols = [defCol("c", "INTEGER", [1]), defCol("d", "INTEGER", [1])]
    company1 = cc.create("company1", company1_cols, {1})
    companies = cc.concat([company0, company1], "companies")

    joined = cc.join(govreg, companies, "joined", ["a"], ["c"])
    cc.aggregate(joined, "expected", ["b"], "d", "sum", "total")

    return {govreg, company0, company1}
Example #16
0
def protocol():
    cols_in_1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    in1 = cc.create("in1", cols_in_1, {1})
    cols_in_2 = [
        defCol("companyID", "INTEGER", [2]),
        defCol("price", "INTEGER", [2])
    ]
    in2 = cc.create("in2", cols_in_2, {2})
    cols_in_3 = [
        defCol("companyID", "INTEGER", [3]),
        defCol("price", "INTEGER", [3])
    ]
    in3 = cc.create("in3", cols_in_3, {3})

    cab_data = cc.concat([in1, in2, in3], "cab_data")

    selected_input = cc.project(cab_data, "selected_input",
                                ["companyID", "price"])
    local_rev = cc.aggregate(selected_input, "local_rev", ["companyID"],
                             "price", "sum", "local_rev")
    scaled_down = cc.divide(local_rev, "scaled_down", "local_rev",
                            ["local_rev", 1000])
    first_val_blank = cc.multiply(scaled_down, "first_val_blank", "companyID",
                                  ["companyID", 0])
    local_rev_scaled = cc.multiply(first_val_blank, "local_rev_scaled",
                                   "local_rev", ["local_rev", 100])
    total_rev = cc.aggregate(first_val_blank, "total_rev", ["companyID"],
                             "local_rev", "sum", "global_rev")
    local_total_rev = cc.join(local_rev_scaled, total_rev, "local_total_rev",
                              ["companyID"], ["companyID"])
    market_share = cc.divide(local_total_rev, "market_share", "local_rev",
                             ["local_rev", "global_rev"])
    market_share_squared = cc.multiply(market_share, "market_share_squared",
                                       "local_rev",
                                       ["local_rev", "local_rev", 1])
    hhi = cc.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev",
                       "sum", "hhi")

    cc.collect(hhi, 1)

    # return root nodes
    return {in1, in2, in3}
Example #17
0
def protocol(all_pids: list):
    pid_col_meds = "0"
    med_col_meds = "4"
    date_col_meds = "7"

    pid_col_diags = "8"
    diag_col_diags = "16"
    date_col_diags = "18"

    num_med_cols = 8
    num_diag_cols = 13

    left_medication_cols = [
        defCol(str(i), "INTEGER", 1) for i in range(num_med_cols)
    ]
    # public PID column
    left_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids)
    left_medication = cc.create("left_medication", left_medication_cols, {1})

    left_diagnosis_cols = [
        defCol(str(i + num_med_cols), "INTEGER", 1)
        for i in range(num_diag_cols)
    ]
    # public PID column
    left_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids)
    left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1})

    right_medication_cols = [
        defCol(str(i), "INTEGER", 2) for i in range(num_med_cols)
    ]
    # public PID column
    right_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids)
    right_medication = cc.create("right_medication", right_medication_cols,
                                 {2})

    right_diagnosis_cols = [
        defCol(str(i + num_med_cols), "INTEGER", 2)
        for i in range(num_diag_cols)
    ]
    # public PID column
    right_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids)
    right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2})

    medication = cc.concat([left_medication, right_medication], "medication")
    diagnosis = cc.concat([left_diagnosis, right_diagnosis], "diagnosis")

    # only keep relevant columns
    medication_proj = cc.project(medication, "medication_proj",
                                 [pid_col_meds, med_col_meds, date_col_meds])
    diagnosis_proj = cc.project(
        diagnosis, "diagnosis_proj",
        [pid_col_diags, diag_col_diags, date_col_diags])

    joined = cc.join(medication_proj, diagnosis_proj, "joined", [pid_col_meds],
                     [pid_col_diags])

    cases = cc.cc_filter(joined,
                         "cases",
                         date_col_diags,
                         "<",
                         other_col_name=date_col_meds)
    aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1)
    heart_patients = cc.cc_filter(aspirin,
                                  "heart_patients",
                                  diag_col_diags,
                                  "==",
                                  scalar=1)

    cc.collect(cc.distinct_count(heart_patients, "actual", pid_col_meds), 1)

    return {left_medication, left_diagnosis, right_medication, right_diagnosis}
Example #18
0
def protocol_mpc(all_pids: list):
    pid_col_meds = "0"
    med_col_meds = "4"
    date_col_meds = "7"

    pid_col_diags = "8"
    diag_col_diags = "16"
    date_col_diags = "18"

    num_med_cols = 8
    num_diag_cols = 13

    left_medication_cols = [defCol(str(i), "INTEGER", 1) for i in range(num_med_cols)]
    # public PID column
    left_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids)
    left_medication = cc.create("left_medication", left_medication_cols, {1})

    left_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", 1) for i in range(num_diag_cols)]
    # public PID column
    left_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids)
    left_diagnosis = cc.create("left_diagnosis", left_diagnosis_cols, {1})

    right_medication_cols = [defCol(str(i), "INTEGER", 2) for i in range(num_med_cols)]
    # public PID column
    right_medication_cols[0] = defCol(pid_col_meds, "INTEGER", all_pids)
    right_medication = cc.create("right_medication", right_medication_cols, {2})

    right_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", 2) for i in range(num_diag_cols)]
    # public PID column
    right_diagnosis_cols[0] = defCol(pid_col_diags, "INTEGER", all_pids)
    right_diagnosis = cc.create("right_diagnosis", right_diagnosis_cols, {2})

    # Manual slicing
    left_keys = cc.union(left_medication, left_diagnosis, "left_pids", pid_col_meds, pid_col_diags)
    right_keys = cc.union(right_medication, right_diagnosis, "right_pids", pid_col_meds, pid_col_diags)

    left_shared_pids = cc._pub_intersect(left_keys, "a_left_shared_pids", pid_col_meds)
    cc._persist(left_shared_pids, "a_left_shared_pids")
    right_shared_pids = cc._pub_intersect(right_keys, "a_right_shared_pids", pid_col_meds, is_server=False)
    cc._persist(right_shared_pids, "a_right_shared_pids")

    left_medication_proj = cc.project(left_medication, "left_medication_proj",
                                      [pid_col_meds, med_col_meds, date_col_meds])
    left_medication_shared = cc.filter_by(left_medication_proj, "left_medication_shared", pid_col_meds,
                                          left_shared_pids)

    left_diagnosis_proj = cc.project(left_diagnosis, "left_diagnosis_proj",
                                     [pid_col_diags, diag_col_diags, date_col_diags])
    left_diagnosis_shared = cc.filter_by(left_diagnosis_proj, "left_diagnosis_shared", pid_col_diags, left_shared_pids)

    right_medication_proj = cc.project(right_medication, "right_medication_proj",
                                       [pid_col_meds, med_col_meds, date_col_meds])
    right_medication_shared = cc.filter_by(right_medication_proj, "right_medication_shared", pid_col_meds,
                                           right_shared_pids)

    right_diagnosis_proj = cc.project(right_diagnosis, "right_diagnosis_proj",
                                      [pid_col_diags, diag_col_diags, date_col_diags])
    right_diagnosis_shared = cc.filter_by(right_diagnosis_proj, "right_diagnosis_shared", pid_col_diags,
                                          right_shared_pids)

    # Slicing done
    medication_shared = cc.concat([left_medication_shared, right_medication_shared], "medication_shared")
    diagnosis_shared = cc.concat([left_diagnosis_shared, right_diagnosis_shared], "diagnosis_shared")

    joined = cc.join(medication_shared, diagnosis_shared, "joined", [pid_col_meds], [pid_col_diags])
    cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds)
    aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1)
    heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1)

    cc.collect(cc.distinct_count(heart_patients, "actual_mpc", pid_col_meds), 1)

    return {
        left_medication,
        left_diagnosis,
        right_medication,
        right_diagnosis
    }