Exemple #1
0
def protocol():
    cols_in1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    in1 = sal.create("green1", cols_in1, {1})
    cols_in2 = [
        defCol("companyID", "INTEGER", [2]),
        defCol("price", "INTEGER", [2])
    ]
    in2 = sal.create("green2", cols_in2, {2})
    cols_in3 = [
        defCol("companyID", "INTEGER", [3]),
        defCol("price", "INTEGER", [3])
    ]
    in3 = sal.create("green3", cols_in3, {3})

    cab_data = sal.concat([in1, in2, in3], "cab_data")

    selected_input = sal.project(cab_data, "selected_input",
                                 ["companyID", "price"])

    local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"],
                              "price", "+", "local_rev")

    scaled_down = sal.divide(local_rev, "scaled_down", "local_rev",
                             ["local_rev", 1000])

    first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID",
                                   ["companyID", 0])

    local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled",
                                    "local_rev", ["local_rev", 100])

    total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"],
                              "local_rev", "+", "global_rev")

    local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev",
                               ["companyID"], ["companyID"])

    market_share = sal.divide(local_total_rev, "market_share", "local_rev",
                              ["local_rev", "global_rev"])

    market_share_squared = sal.multiply(market_share, "market_share_squared",
                                        "local_rev",
                                        ["local_rev", "local_rev", 1])

    hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"],
                        "local_rev", "+", "hhi")

    sal.collect(hhi, 1)

    # return root nodes
    return {in1, in2, in3}
        def protocol():
            cols_in_1 = [
                defCol("companyID", "INTEGER", [1]),
                defCol("price", "INTEGER", [1])
            ]
            in_1 = cc.create("yellow1", cols_in_1, {1})
            cols_in_2 = [
                defCol("companyID", "INTEGER", [2]),
                defCol("price", "INTEGER", [2])
            ]
            in_2 = cc.create("yellow2", cols_in_2, {2})
            cols_in_3 = [
                defCol("companyID", "INTEGER", [3]),
                defCol("price", "INTEGER", [3])
            ]
            in_3 = cc.create("yellow3", cols_in_3, {3})

            cab_data = cc.concat([in_1, in_2, in_3], "cab_data")

            selected_input = cc.project(cab_data, "selected_input",
                                        ["companyID", "price"])
            local_rev = cc.aggregate(selected_input, "local_rev",
                                     ["companyID"], "price", "sum",
                                     "local_rev")
            scaled_down = cc.divide(local_rev, "scaled_down", "local_rev",
                                    ["local_rev", 1000])
            first_val_blank = cc.multiply(scaled_down, "first_val_blank",
                                          "companyID", ["companyID", 0])
            local_rev_scaled = cc.multiply(first_val_blank, "local_rev_scaled",
                                           "local_rev", ["local_rev", 100])
            total_rev = cc.aggregate(first_val_blank, "total_rev",
                                     ["companyID"], "local_rev", "sum",
                                     "global_rev")
            local_total_rev = cc.join(local_rev_scaled, total_rev,
                                      "local_total_rev", ["companyID"],
                                      ["companyID"])
            market_share = cc.divide(local_total_rev, "market_share",
                                     "local_rev", ["local_rev", "global_rev"])
            market_share_squared = cc.multiply(market_share,
                                               "market_share_squared",
                                               "local_rev",
                                               ["local_rev", "local_rev", 1])
            hhi = cc.aggregate(market_share_squared, "hhi", ["companyID"],
                               "local_rev", "sum", "hhi")
            # dummy projection to force non-mpc subdag
            hhi_only = cc.project(hhi, "hhi_only", ["companyID", "hhi"])

            cc.collect(hhi_only, 1)

            # return root nodes
            return {in_1, in_2, in_3}
Exemple #3
0
def protocol():

    # define inputs
    colsIn1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    in1 = sal.create("in1", colsIn1, set([1]))
    colsIn2 = [
        defCol("companyID", "INTEGER", [2]),
        defCol("price", "INTEGER", [2])
    ]
    in2 = sal.create("in2", colsIn2, set([2]))
    colsIn3 = [
        defCol("companyID", "INTEGER", [3]),
        defCol("price", "INTEGER", [3])
    ]
    in3 = sal.create("in3", colsIn3, set([3]))

    cl1 = sal._close(in1, "cl1", set([1, 2, 3]))
    cl2 = sal._close(in2, "cl2", set([1, 2, 3]))
    cl3 = sal._close(in3, "cl3", set([1, 2, 3]))
    cab_data = sal.concat([cl1, cl2, cl3], "cab_data")

    selected_input = sal.project(cab_data, "selected_input",
                                 ["companyID", "price"])
    local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"],
                              "price", "+", "local_rev")
    scaled_down = sal.divide(local_rev, "scaled_down", "local_rev",
                             ["local_rev", 1000])
    first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID",
                                   ["companyID", 0])
    local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled",
                                    "local_rev", ["local_rev", 100])
    total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"],
                              "local_rev", "+", "global_rev")
    local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev",
                               ["companyID"], ["companyID"])
    market_share = sal.divide(local_total_rev, "market_share", "local_rev",
                              ["local_rev", "global_rev"])
    market_share_squared = sal.multiply(market_share, "market_share_squared",
                                        "local_rev",
                                        ["local_rev", "local_rev", 1])
    hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"],
                        "local_rev", "+", "hhi")

    hhi_opened = sal._open(hhi, "hhi_opened", 1)

    # return root nodes
    return set([in1, in2, in3])
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = cc.create("in2", cols_in_2, {2})
            cols_in_3 = [
                defCol("a", "INTEGER", [3]),
                defCol("b", "INTEGER", [3])
            ]
            in_3 = cc.create("in_3", cols_in_3, {3})

            # combine parties' inputs into one relation
            rel = cc.concat([in_1, in_2, in_3], "rel")
            proj = cc.project(rel, "proj", ["a", "b"])
            agg = cc.aggregate(proj, "agg", ["a"], "b", "sum", "total_b")
            div = cc.divide(agg, "div", "a", ["a", 1])
            mult = cc.multiply(div, "mult", "a", ["a", 1])

            cc.collect(mult, 1)

            # return root nodes
            return {in_1, in_2, in_3}
Exemple #5
0
def mult_mixed():

    inputs, rel = setup()
    res = sal.multiply(rel, "res", "a", ["a", "b"])

    opened = sal._open(res, "opened", 1)
    return inputs
Exemple #6
0
        def protocol():
            inputs, rel = setup()
            mult = sal.multiply(rel, 'mult1', 'a', ['a', 'b'])

            opened = sal._open(mult, "opened", 1)

            return inputs
        def protocol():

            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = sal.create("in_1", cols_in_1, set([1]))
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = sal.create("in2", cols_in_2, set([2]))
            cols_in_3 = [
                defCol("a", "INTEGER", [3]),
                defCol("b", "INTEGER", [3])
            ]
            in_3 = sal.create("in_3", cols_in_3, set([3]))

            # combine parties' inputs into one relation
            rel = sal.concat([in_1, in_2, in_3], "rel")
            proj = sal.project(rel, "proj", ["a", "b"])
            agg = sal.aggregate(proj, "agg", ["a"], "b", "+", "total_b")
            div = sal.divide(agg, "div", "a", ["a", 1])
            mult = sal.multiply(div, "mult", "a", ["a", 1])

            sal.collect(mult, 1)

            # return root nodes
            return set([in_1, in_2, in_3])
Exemple #8
0
def mult_by_const():

    inputs, rel = setup()
    res = sal.multiply(rel, "res", "a", ["a", 10])

    opened = sal._open(res, "opened", 1)
    return inputs
Exemple #9
0
        def protocol():
            inpts = setup()
            in_1 = inpts[0]

            mult = sal.multiply(in_1, "mult", "a", ["a", "b"])
            out = sal.collect(mult, 1)

            return set([in_1])
Exemple #10
0
def protocol():

    cols_in_1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    cols_in_2 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    cols_in_3 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]

    yellow1 = sal.create("yellow1", cols_in_1, {1})
    yellow2 = sal.create("yellow2", cols_in_2, {1})
    yellow3 = sal.create("yellow3", cols_in_3, {1})

    cab_data = sal.concat([yellow1, yellow2, yellow3], "cab_data")

    selected_input = sal.project(cab_data, "selected_input",
                                 ["companyID", "price"])
    local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"],
                              "price", "+", "local_rev")
    scaled_down = sal.divide(local_rev, "scaled_down", "local_rev",
                             ["local_rev", 1000])
    first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID",
                                   ["companyID", 0])
    local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled",
                                    "local_rev", ["local_rev", 100])
    total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"],
                              "local_rev", "+", "global_rev")
    local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev",
                               ["companyID"], ["companyID"])
    market_share = sal.divide(local_total_rev, "market_share", "local_rev",
                              ["local_rev", "global_rev"])
    market_share_squared = sal.multiply(market_share, "market_share_squared",
                                        "local_rev",
                                        ["local_rev", "local_rev", 1])
    hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"],
                        "local_rev", "+", "hhi")

    sal.collect(hhi, 1)

    return {yellow1, yellow2, yellow3}
Exemple #11
0
        def protocol():
            inpts = setup()
            in_1 = inpts[0]

            mult = cc.multiply(in_1, "mult", "a", ["a", "b"])
            out = cc.collect(mult, 1)

            return {in_1}
Exemple #12
0
    def protocol():

        colsInA = [
            defCol('a', 'INTEGER', [1]),
            defCol('b', 'INTEGER', [1]),
        ]

        in1 = sal.create("in1", colsInA, set([1]))
        mult1 = sal.multiply(in1, 'mult1', 'a', ['a', 'b'])

        return set([in1])
Exemple #13
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            mult = sal.multiply(in_1, "mult", "a", ["b", "c"])
            proj_2 = sal.project(in_2, "proj_2", ["a", "b"])
            join = sal.join(mult, proj_2, "join", ["a", "b"], ["a", "b"])
            agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = sal.collect(agg, 1)

            return {in_1, in_2}
Exemple #14
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            div_1 = sal.divide(in_1, "div", "a", ["a", "b"])
            mult_2 = sal.multiply(in_2, "mult", "a", ["a", "b"])
            proj_1 = sal.project(div_1, "proj", ["a", "b"])
            join = sal.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"])
            agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = sal.collect(agg, 1)

            return set([in_1, in_2])
Exemple #15
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            div_1 = cc.divide(in_1, "div", "a", ["a", "b"])
            mult_2 = cc.multiply(in_2, "mult", "a", ["a", "b"])
            proj_1 = cc.project(div_1, "proj", ["a", "b"])
            join = cc.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"])
            agg = cc.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            cc.collect(agg, 1)

            return {in_1, in_2}
Exemple #16
0
def protocol():

    colsInA = [
        defCol("store_code_uc", "STRING", [1]),
        defCol("upc", "STRING", [1]),
        defCol("week_end", "STRING", [1]),
        defCol("units", "INTEGER", [1]),
        defCol("prmult", "INTEGER", [1]),
        defCol("price", "FLOAT", [1]),
        defCol("retailer_code", "STRING", [1]),
        defCol("store_zip3", "STRING", [1])
    ]
    create = sal.create("movement", colsInA, set([1]))

    # divides 'price' by 'prmult' to compute unit price.
    w_unit_p = sal.divide(create, "w_unit_p", 'unit_price',
                          ['price', 'prmult'])

    # aggregate multiple entries for the same (store, product, week) combination
    sum_units = sal.aggregate(w_unit_p, 'sum_units',
                              ['store_code_uc', 'upc', 'week_end'], 'units',
                              '+', 'q')

    # add 'unit_price' to each row keyed by (store, product, week)
    total_units = sal.join(w_unit_p, sum_units, 'total_units',
                           ['store_code_uc', 'upc', 'week_end'],
                           ['store_code_uc', 'upc', 'week_end'])

    # computed weighted unit price (multiply aggregate units sold by their per-unit price)
    wghtd_total = sal.multiply(total_units, 'wghtd_total', 'wghtd_unit_p',
                               ['units', 'unit_price'])

    # compute some kind of weighted per-unit price by dividing by 'q' (total units sold)
    wghtd_total_final = sal.divide(wghtd_total, 'wghtd_total_final',
                                   'wghtd_unit_p', ['wghtd_unit_p', 'q'])

    total_unit_wghts = sal.aggregate(wghtd_total_final, 'total_unit_wghts',
                                     ['store_code_uc', 'upc', 'week_end'],
                                     'wghtd_unit_p', '+', 'avg_unit_p')

    # merge in avg_unit_p
    final_join = sal.join(total_units, total_unit_wghts, 'final_join',
                          ['store_code_uc', 'upc', 'week_end'],
                          ['store_code_uc', 'upc', 'week_end'])

    selected_cols = sal.project(final_join, 'selected_cols', [
        'store_code_uc', 'upc', 'week_end', 'q', 'avg_unit_p', 'retailer_code',
        'store_zip3'
    ])

    opened = sal.collect(selected_cols, 1)

    return set([create])
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = cc.create("in_2", cols_in_2, {2})

            # combine parties' inputs into one relation
            rel = cc.concat([in_1, in_2], "rel")

            # specify the workflow
            mult = cc.multiply(rel, "mult", "a", ["a", 1])

            cc.collect(mult, 1)

            # return root nodes
            return {in_1, in_2}
Exemple #18
0
def protocol():
    """
    A demo protocol which reads data from data/input_relation.csv, computes a multiplication, followed by an aggregation,
    and stores the result under data/aggregated.csv.
    :return set of input relations
    """
    # define the input schema, providing column name, type, and trust set
    input_columns = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    # define input relation, providing relation name, columns, and owner set
    input_relation = lang.create("input_relation", input_columns, {1})
    # square column_b, i.e., compute (column_a, column_b) -> (column_a, column_b * column_b)
    squared = lang.multiply(input_relation, "squared", "column_b",
                            ["column_b", "column_b"])
    # sum group by column_a on column_b and rename group-over column to summed
    lang.aggregate(squared, "aggregated", ["column_a"], "column_b", "+",
                   "summed")
    # leaf nodes are automatically written to file so aggregated will be written to ./data/aggregated.csv

    # return all input relations
    return {input_relation}
        def protocol():

            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = sal.create("in1", cols_in_1, set([1]))
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = sal.create("in_2", cols_in_2, set([2]))

            # combine parties' inputs into one relation
            rel = sal.concat([in_1, in_2], "rel")

            # specify the workflow
            mult = sal.multiply(rel, "mult", "a", ["a", 0])

            sal.collect(mult, 1)

            # return root nodes
            return set([in_1, in_2])
Exemple #20
0
def protocol():

    cols_concatenated_DFs = [
        defCol("store_code_uc", "STRING", [1]),
        defCol('upc', 'STRING', [1]),
        defCol('week_end', 'STRING', [1]),
        defCol('q', 'INTEGER', [1]),
        defCol('avg_unit_p', 'FLOAT', [1]),
        defCol('retailer_code', 'STRING', [1]),
        defCol('store_zip3', 'STRING', [1])
    ]

    cols_temp_UPC_brandBU_crspnd = [
        defCol('brand_code_bu', 'STRING', [2]),
        defCol('brand_descr_bu', 'STRING', [2]),
        defCol('upc', 'STRING', [2]),
        defCol('size1_amount', 'FLOAT', [2]),
    ]

    # concatenated DFs from local_workflow.py
    concatenated_DFs = sal.create('concatenated_DFs', cols_concatenated_DFs,
                                  set([1]))

    # the output of preprocess_products.py
    temp_UPC_brandBU_crspnd = sal.create('temp_UPC_brandBU_crspnd',
                                         cols_temp_UPC_brandBU_crspnd,
                                         set([1]))
    '''
    SECTION 1
    Compute the quantity weighted average price per unit
    & total quantity sold at the store-brand level
    '''
    w_upc = sal.join(concatenated_DFs, temp_UPC_brandBU_crspnd, 'w_upc',
                     ['upc'], ['upc'])
    w_avg_OZ_p = sal.divide(w_upc, 'w_avg_OZ_p', 'avg_OZ_p',
                            ['avg_unit_p', 'size1_amount'])
    w_q_upd = sal.multiply(w_avg_OZ_p, 'w_q_upd', 'q', ['q', 'size1_amount'])
    brand_OZq_sum = sal.aggregate(
        w_q_upd, 'brand_OZq_sum',
        ['store_code_uc', 'brand_code_bu', 'week_end'], 'q', '+', 'brand_OZq')
    total_brnd_OZq = sal.join(w_q_upd, brand_OZq_sum, 'total_brnd_OZq',
                              ['store_code_uc', 'brand_code_bu', 'week_end'],
                              ['store_code_uc', 'brand_code_bu', 'week_end'])
    w_wghtd_OZ_brnd_p = sal.multiply(total_brnd_OZq, 'w_wghtd_OZ_brnd_p',
                                     'wghtd_OZ_brnd_p', ['q', 'avg_OZ_p'])
    w_wghtd_OZ_brnd_p_final = sal.divide(w_wghtd_OZ_brnd_p,
                                         'w_wghtd_OZ_brnd_p_final',
                                         'wghtd_OZ_brnd_p',
                                         ['wghtd_OZ_brnd_p', 'brand_OZq'])
    brnd_p_sum = sal.aggregate(w_wghtd_OZ_brnd_p_final, 'brnd_p_sum',
                               ['store_code_uc', 'brand_code_bu', 'week_end'],
                               'wghtd_OZ_brnd_p', '+', 'avg_OZ_brnd_p')
    result = sal.join(brnd_p_sum, w_wghtd_OZ_brnd_p_final, 'result',
                      ['store_code_uc', 'brand_code_bu', 'week_end'],
                      ['store_code_uc', 'brand_code_bu', 'week_end'])
    section_one_result = sal.project(result, 'section_one_result', [
        "avg_OZ_brnd_p", "week_end", "store_code_uc", "brand_code_bu",
        "brand_descr_bu", "brand_OZq", 'retailer_code', 'store_zip3', 'q'
    ])
    '''
    SECTION 2
    Compute the average price per OZ & total OZs sold for each brand at the
    retailer-$geo_unit level, by compiling the store level data that comprises each
    retailer-$geo_unit. Compute the total quantity sold by each retailer-$geo_unit
    '''

    temp_sum = sal.aggregate(
        section_one_result, 'temp_sum',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        'brand_OZq', '+', 'brand_OZq')
    result_brnd_sum = sal.join(
        section_one_result, temp_sum, 'result_brnd_sum',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'])
    wghtd_p_mult = sal.multiply(result_brnd_sum, 'wghtd_p_mult', 'wghtd_p',
                                ['brand_OZq', 'avg_OZ_brnd_p'])
    wghtd_p_final = sal.divide(wghtd_p_mult, 'wghtd_p_final', 'wghtd_p',
                               ['wghtd_p', 'q'])
    wghtd_p_sum = sal.aggregate(
        wghtd_p_final, 'wghtd_p_sum',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        'wghtd_p', '+', 'p')
    sec_4_result = sal.join(
        wghtd_p_final, wghtd_p_sum, 'sec_4_result',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'])

    # TODO: filter out sec_4_result rows where 'store_zip3' cell is empty

    final = sal.project(sec_4_result, 'final', [
        'store_zip3', 'retailer_code', 'week_end', 'brand_code_bu',
        'brand_descr_bu', 'q', 'p'
    ])

    opened = sal.collect(final, 1)

    return set([concatenated_DFs, temp_UPC_brandBU_crspnd])