Ejemplo n.º 1
0
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = cc.create("in2", cols_in_2, {2})
            cols_in_3 = [
                defCol("a", "INTEGER", [3]),
                defCol("b", "INTEGER", [3])
            ]
            in_3 = cc.create("in_3", cols_in_3, {3})

            # combine parties' inputs into one relation
            rel = cc.concat([in_1, in_2, in_3], "rel")
            proj = cc.project(rel, "proj", ["a", "b"])
            agg = cc.aggregate(proj, "agg", ["a"], "b", "sum", "total_b")
            div = cc.divide(agg, "div", "a", ["a", 1])
            mult = cc.multiply(div, "mult", "a", ["a", 1])

            cc.collect(mult, 1)

            # return root nodes
            return {in_1, in_2, in_3}
Ejemplo n.º 2
0
        def protocol():
            govreg_cols = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            govreg = cc.create("a_govreg", govreg_cols, {1})
            govreg_dummy = cc.project(govreg, "govreg_dummy", ["a", "b"])

            company0_cols = [
                defCol("c", "INTEGER", [1], [2]),
                defCol("d", "INTEGER", [2])
            ]
            company0 = cc.create("company0", company0_cols, {2})
            company0_dummy = cc.project(company0, "company0_dummy", ["c", "d"])

            company1_cols = [
                defCol("c", "INTEGER", [1], [3]),
                defCol("d", "INTEGER", [3])
            ]
            company1 = cc.create("company1", company1_cols, {3})
            company1_dummy = cc.project(company1, "company1_dummy", ["c", "d"])

            companies = cc.concat([company0_dummy, company1_dummy],
                                  "companies")

            joined = cc.join(govreg_dummy, companies, "joined", ["a"], ["c"])
            res = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total")
            cc.collect(res, 1)

            return {govreg, company0, company1}
Ejemplo n.º 3
0
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = cc.create("in_2", cols_in_2, {2})

            # combine parties' inputs into one relation
            rel = cc.concat([in_1, in_2], "rel")

            # specify the workflow
            proj_a = cc.project(rel, "proj_a", ["a", "b"])
            proj_b = cc.project(proj_a, "proj_b", ["a", "b"])
            agg = cc.aggregate(proj_b, "agg", ["a"], "b", "sum", "total_b")
            proj_c = cc.project(agg, "proj_c", ["a", "total_b"])

            cc.collect(proj_c, 1)

            # return root nodes
            return {in_1, in_2}
Ejemplo n.º 4
0
def protocol():
    govreg_cols = [
        defCol("a", "INTEGER", 1),
        defCol("b", "INTEGER", 1)
    ]
    govreg = cc.create("govreg", govreg_cols, {1})

    company0_cols = [
        defCol("c", "INTEGER", 2),
        defCol("d", "INTEGER", 2)
    ]
    company0 = cc.create("company0", company0_cols, {2})

    company1_cols = [
        defCol("c", "INTEGER", 3),
        defCol("d", "INTEGER", 3)
    ]
    company1 = cc.create("company1", company1_cols, {3})

    companies = cc.concat([company0, company1], "companies")

    joined = cc.join(govreg, companies, "joined", ["a"], ["c"])
    actual = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total")
    cc.collect(actual, 1)

    return {govreg, company0, company1}
Ejemplo n.º 5
0
        def protocol():
            cols_in_1 = [
                defCol("companyID", "INTEGER", [1]),
                defCol("price", "INTEGER", [1])
            ]
            in_1 = sal.create("yellow1", cols_in_1, set([1]))
            cols_in_2 = [
                defCol("companyID", "INTEGER", [2]),
                defCol("price", "INTEGER", [2])
            ]
            in_2 = sal.create("yellow2", cols_in_2, set([2]))
            cols_in_3 = [
                defCol("companyID", "INTEGER", [3]),
                defCol("price", "INTEGER", [3])
            ]
            in_3 = sal.create("yellow3", cols_in_3, set([3]))

            cab_data = sal.concat([in_1, in_2, in_3], "cab_data")

            selected_input = sal.project(
                cab_data, "selected_input", ["companyID", "price"])
            local_rev = sal.aggregate(selected_input, "local_rev", [
                "companyID"], "price", "+", "local_rev")
            scaled_down = sal.divide(
                local_rev, "scaled_down", "local_rev", ["local_rev", 1000])
            first_val_blank = sal.multiply(
                scaled_down, "first_val_blank", "companyID", ["companyID", 0])
            local_rev_scaled = sal.multiply(
                first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100])
            total_rev = sal.aggregate(first_val_blank, "total_rev", [
                "companyID"], "local_rev", "+", "global_rev")
            local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev", [
                "companyID"], ["companyID"])
            market_share = sal.divide(local_total_rev, "market_share", "local_rev", [
                "local_rev", "global_rev"])
            market_share_squared = sal.multiply(market_share, "market_share_squared", "local_rev",
                                                ["local_rev", "local_rev", 1])
            hhi = sal.aggregate(market_share_squared, "hhi", [
                "companyID"], "local_rev", "+", "hhi")
            # dummy projection to force non-mpc subdag
            hhi_only = sal.project(
                hhi, "hhi_only", ["companyID", "hhi"])

            sal.collect(hhi_only, 1)

            # return root nodes
            return set([in_1, in_2, in_3])
Ejemplo n.º 6
0
        def protocol():
            inpts = setup()
            in_1 = inpts[0]

            agg = cc.aggregate(in_1, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = cc.collect(agg, 1)

            return {in_1}
Ejemplo n.º 7
0
def protocol():

    cols_in_1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    cols_in_2 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    cols_in_3 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]

    yellow1 = sal.create("yellow1", cols_in_1, {1})
    yellow2 = sal.create("yellow2", cols_in_2, {1})
    yellow3 = sal.create("yellow3", cols_in_3, {1})

    cab_data = sal.concat([yellow1, yellow2, yellow3], "cab_data")

    selected_input = sal.project(cab_data, "selected_input",
                                 ["companyID", "price"])
    local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"],
                              "price", "+", "local_rev")
    scaled_down = sal.divide(local_rev, "scaled_down", "local_rev",
                             ["local_rev", 1000])
    first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID",
                                   ["companyID", 0])
    local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled",
                                    "local_rev", ["local_rev", 100])
    total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"],
                              "local_rev", "+", "global_rev")
    local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev",
                               ["companyID"], ["companyID"])
    market_share = sal.divide(local_total_rev, "market_share", "local_rev",
                              ["local_rev", "global_rev"])
    market_share_squared = sal.multiply(market_share, "market_share_squared",
                                        "local_rev",
                                        ["local_rev", "local_rev", 1])
    hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"],
                        "local_rev", "+", "hhi")

    sal.collect(hhi, 1)

    return {yellow1, yellow2, yellow3}
Ejemplo n.º 8
0
        def protocol():
            inpts = setup()
            in_1 = inpts[0]

            agg = sal.aggregate(in_1, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = sal.collect(agg, 1)

            return set([in_1])
Ejemplo n.º 9
0
def protocol():
    cols_in_1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    in1 = cc.create("in1", cols_in_1, {1})
    cols_in_2 = [
        defCol("companyID", "INTEGER", [2]),
        defCol("price", "INTEGER", [2])
    ]
    in2 = cc.create("in2", cols_in_2, {2})
    cols_in_3 = [
        defCol("companyID", "INTEGER", [3]),
        defCol("price", "INTEGER", [3])
    ]
    in3 = cc.create("in3", cols_in_3, {3})

    cab_data = cc.concat([in1, in2, in3], "cab_data")

    selected_input = cc.project(cab_data, "selected_input",
                                ["companyID", "price"])
    local_rev = cc.aggregate(selected_input, "local_rev", ["companyID"],
                             "price", "sum", "local_rev")
    scaled_down = cc.divide(local_rev, "scaled_down", "local_rev",
                            ["local_rev", 1000])
    first_val_blank = cc.multiply(scaled_down, "first_val_blank", "companyID",
                                  ["companyID", 0])
    local_rev_scaled = cc.multiply(first_val_blank, "local_rev_scaled",
                                   "local_rev", ["local_rev", 100])
    total_rev = cc.aggregate(first_val_blank, "total_rev", ["companyID"],
                             "local_rev", "sum", "global_rev")
    local_total_rev = cc.join(local_rev_scaled, total_rev, "local_total_rev",
                              ["companyID"], ["companyID"])
    market_share = cc.divide(local_total_rev, "market_share", "local_rev",
                             ["local_rev", "global_rev"])
    market_share_squared = cc.multiply(market_share, "market_share_squared",
                                       "local_rev",
                                       ["local_rev", "local_rev", 1])
    hhi = cc.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev",
                       "sum", "hhi")

    cc.collect(hhi, 1)

    # return root nodes
    return {in1, in2, in3}
Ejemplo n.º 10
0
def agg():

    in1 = setup()[0]

    agg = sal.aggregate(in1, "agg", ["a", "b"], "c", "sum", "agg1")

    out = sal.collect(agg, 1)

    return set([in1])
Ejemplo n.º 11
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            mult = sal.multiply(in_1, "mult", "a", ["b", "c"])
            proj_2 = sal.project(in_2, "proj_2", ["a", "b"])
            join = sal.join(mult, proj_2, "join", ["a", "b"], ["a", "b"])
            agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = sal.collect(agg, 1)

            return {in_1, in_2}
Ejemplo n.º 12
0
    def protocol():

        colsInA = [
            defCol('a', 'INTEGER', [1]),
            defCol('b', 'INTEGER', [1]),
        ]

        in1 = sal.create("in1", colsInA, set([1]))
        agg1 = sal.aggregate(in1, 'agg1', ['a'], 'b', '+', 'b')

        return set([in1])
Ejemplo n.º 13
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            div_1 = cc.divide(in_1, "div", "a", ["a", "b"])
            mult_2 = cc.multiply(in_2, "mult", "a", ["a", "b"])
            proj_1 = cc.project(div_1, "proj", ["a", "b"])
            join = cc.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"])
            agg = cc.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            cc.collect(agg, 1)

            return {in_1, in_2}
Ejemplo n.º 14
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            div_1 = sal.divide(in_1, "div", "a", ["a", "b"])
            mult_2 = sal.multiply(in_2, "mult", "a", ["a", "b"])
            proj_1 = sal.project(div_1, "proj", ["a", "b"])
            join = sal.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"])
            agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = sal.collect(agg, 1)

            return set([in_1, in_2])
Ejemplo n.º 15
0
def protocol():
    left_cols = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    left = cc.create("left", left_cols, {1})
    left_dummy = cc.project(left, "left_dummy", ["a", "b"])

    right_cols = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])]
    right = cc.create("right", right_cols, {2})
    right_dummy = cc.project(right, "right_dummy", ["c", "d"])

    joined = cc.join(left_dummy, right_dummy, "joined", ["a"], ["c"])
    cc.collect(cc.aggregate(joined, "actual", ["b"], "d", "sum", "total"), 1)

    return {left, right}
Ejemplo n.º 16
0
def protocol():
    """
    A demo protocol which reads data from data/input_relation.csv, computes a multiplication, followed by an aggregation,
    and stores the result under data/aggregated.csv.
    :return set of input relations
    """
    # define the input schema, providing column name, type, and trust set
    input_columns = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    # define input relation, providing relation name, columns, and owner set
    input_relation = lang.create("input_relation", input_columns, {1})
    # square column_b, i.e., compute (column_a, column_b) -> (column_a, column_b * column_b)
    squared = lang.multiply(input_relation, "squared", "column_b",
                            ["column_b", "column_b"])
    # sum group by column_a on column_b and rename group-over column to summed
    lang.aggregate(squared, "aggregated", ["column_a"], "column_b", "+",
                   "summed")
    # leaf nodes are automatically written to file so aggregated will be written to ./data/aggregated.csv

    # return all input relations
    return {input_relation}
Ejemplo n.º 17
0
def protocol():
    input_columns_left = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    left = cc.create("left", input_columns_left, {1})
    input_columns_right = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_c", "INTEGER", [1])
    ]
    right = cc.create("right", input_columns_right, {1})
    cc.collect(
        cc.aggregate(cc.concat([left, right], "rel"), "expected", ["column_a"],
                     "column_b", "sum", "total_b"), 1)
    return {left, right}
Ejemplo n.º 18
0
 def protocol():
     cols_in_1 = [
         defCol("a", "INTEGER", [1]),
         defCol("b", "INTEGER", [1])
     ]
     in_1 = cc.create("in_1", cols_in_1, {1})
     cols_in_2 = [
         defCol("a", "INTEGER", [1], [2]),
         defCol("b", "INTEGER", [2])
     ]
     in_2 = cc.create("in_2", cols_in_2, {2})
     cc.collect(
         cc.aggregate(cc.concat([in_1, in_2], "rel"), "agg", ["a"], "b",
                      "sum", "total_b"), 1)
     return {in_1, in_2}
Ejemplo n.º 19
0
def protocol():
    input_columns_left = [
        defCol("column_a", "INTEGER", [1]),
        defCol("column_b", "INTEGER", [1])
    ]
    left = cc.create("left", input_columns_left, {1})
    input_columns_right = [
        defCol("column_a", "INTEGER", [1], [2]),
        defCol("column_c", "INTEGER", [1])
    ]
    right = cc.create("right", input_columns_right, {2})
    aggregated = cc.aggregate(cc.concat([left, right], "rel"), "actual",
                              ["column_a"], "column_b", "sum", "total_b")
    actual_open = cc.project(aggregated, "actual_open",
                             ["column_a", "total_b"])
    cc.collect(actual_open, 1)
    return {left, right}
Ejemplo n.º 20
0
def protocol():

    # define inputs
    colsIn1 = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    in1 = sal.create("in1", colsIn1, set([1]))
    colsIn2 = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])]
    in2 = sal.create("in2", colsIn2, set([2]))
    colsIn3 = [defCol("e", "INTEGER", [3]), defCol("f", "INTEGER", [3])]
    in3 = sal.create("in3", colsIn3, set([3]))

    cl1 = sal._close(in1, "cl1", set([1, 2, 3]))
    cl2 = sal._close(in2, "cl2", set([1, 2, 3]))
    cl3 = sal._close(in3, "cl3", set([1, 2, 3]))
    rel = sal.concat([cl1, cl2, cl3], "rel")
    agg = sal.aggregate(rel, "agg", ["a"], "b", "+", "total")

    opened = sal._open(agg, "opened", 1)
    # return root nodes
    return set([in1, in2, in3])
Ejemplo n.º 21
0
def protocol():

    # define inputs
    colsIn1 = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    in1 = sal.create("govreg", colsIn1, set([1]))
    colsIn2 = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])]
    in2 = sal.create("company0", colsIn2, set([2]))
    colsIn3 = [defCol("c", "INTEGER", [3]), defCol("d", "INTEGER", [3])]
    in3 = sal.create("company1", colsIn3, set([3]))

    cl1 = sal._close(in1, "cl1", set([1, 2, 3]))
    projA = sal.project(cl1, "projA", ["a", "b"])
    cl2 = sal._close(in2, "cl2", set([1, 2, 3]))
    cl3 = sal._close(in3, "cl3", set([1, 2, 3]))
    right_rel = sal.concat([cl2, cl3], "right_rel")
    projB = sal.project(right_rel, "projB", ["c", "d"])

    joined = sal.join(projA, right_rel, "joined", ["a"], ["c"])
    agg = sal.aggregate(joined, "agg", ["b"], "d", "+", "total")

    opened = sal._open(agg, "opened", 1)
    return set([in1, in2, in3])
Ejemplo n.º 22
0
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = sal.create("in_1", cols_in_1, set([1]))
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = sal.create("in_2", cols_in_2, set([2]))

            # combine parties' inputs into one relation
            rel = sal.concat([in_1, in_2], "rel")

            # specify the workflow
            agg = sal.aggregate(rel, "agg", ["a"], "b", "+", "total_b")

            sal.collect(agg, 1)

            # return root nodes
            return set([in_1, in_2])
Ejemplo n.º 23
0
def protocol():

    cols_concatenated_DFs = [
        defCol("store_code_uc", "STRING", [1]),
        defCol('upc', 'STRING', [1]),
        defCol('week_end', 'STRING', [1]),
        defCol('q', 'INTEGER', [1]),
        defCol('avg_unit_p', 'FLOAT', [1]),
        defCol('retailer_code', 'STRING', [1]),
        defCol('store_zip3', 'STRING', [1])
    ]

    cols_temp_UPC_brandBU_crspnd = [
        defCol('brand_code_bu', 'STRING', [2]),
        defCol('brand_descr_bu', 'STRING', [2]),
        defCol('upc', 'STRING', [2]),
        defCol('size1_amount', 'FLOAT', [2]),
    ]

    # concatenated DFs from local_workflow.py
    concatenated_DFs = sal.create('concatenated_DFs', cols_concatenated_DFs,
                                  set([1]))

    # the output of preprocess_products.py
    temp_UPC_brandBU_crspnd = sal.create('temp_UPC_brandBU_crspnd',
                                         cols_temp_UPC_brandBU_crspnd,
                                         set([1]))
    '''
    SECTION 1
    Compute the quantity weighted average price per unit
    & total quantity sold at the store-brand level
    '''
    w_upc = sal.join(concatenated_DFs, temp_UPC_brandBU_crspnd, 'w_upc',
                     ['upc'], ['upc'])
    w_avg_OZ_p = sal.divide(w_upc, 'w_avg_OZ_p', 'avg_OZ_p',
                            ['avg_unit_p', 'size1_amount'])
    w_q_upd = sal.multiply(w_avg_OZ_p, 'w_q_upd', 'q', ['q', 'size1_amount'])
    brand_OZq_sum = sal.aggregate(
        w_q_upd, 'brand_OZq_sum',
        ['store_code_uc', 'brand_code_bu', 'week_end'], 'q', '+', 'brand_OZq')
    total_brnd_OZq = sal.join(w_q_upd, brand_OZq_sum, 'total_brnd_OZq',
                              ['store_code_uc', 'brand_code_bu', 'week_end'],
                              ['store_code_uc', 'brand_code_bu', 'week_end'])
    w_wghtd_OZ_brnd_p = sal.multiply(total_brnd_OZq, 'w_wghtd_OZ_brnd_p',
                                     'wghtd_OZ_brnd_p', ['q', 'avg_OZ_p'])
    w_wghtd_OZ_brnd_p_final = sal.divide(w_wghtd_OZ_brnd_p,
                                         'w_wghtd_OZ_brnd_p_final',
                                         'wghtd_OZ_brnd_p',
                                         ['wghtd_OZ_brnd_p', 'brand_OZq'])
    brnd_p_sum = sal.aggregate(w_wghtd_OZ_brnd_p_final, 'brnd_p_sum',
                               ['store_code_uc', 'brand_code_bu', 'week_end'],
                               'wghtd_OZ_brnd_p', '+', 'avg_OZ_brnd_p')
    result = sal.join(brnd_p_sum, w_wghtd_OZ_brnd_p_final, 'result',
                      ['store_code_uc', 'brand_code_bu', 'week_end'],
                      ['store_code_uc', 'brand_code_bu', 'week_end'])
    section_one_result = sal.project(result, 'section_one_result', [
        "avg_OZ_brnd_p", "week_end", "store_code_uc", "brand_code_bu",
        "brand_descr_bu", "brand_OZq", 'retailer_code', 'store_zip3', 'q'
    ])
    '''
    SECTION 2
    Compute the average price per OZ & total OZs sold for each brand at the
    retailer-$geo_unit level, by compiling the store level data that comprises each
    retailer-$geo_unit. Compute the total quantity sold by each retailer-$geo_unit
    '''

    temp_sum = sal.aggregate(
        section_one_result, 'temp_sum',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        'brand_OZq', '+', 'brand_OZq')
    result_brnd_sum = sal.join(
        section_one_result, temp_sum, 'result_brnd_sum',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'])
    wghtd_p_mult = sal.multiply(result_brnd_sum, 'wghtd_p_mult', 'wghtd_p',
                                ['brand_OZq', 'avg_OZ_brnd_p'])
    wghtd_p_final = sal.divide(wghtd_p_mult, 'wghtd_p_final', 'wghtd_p',
                               ['wghtd_p', 'q'])
    wghtd_p_sum = sal.aggregate(
        wghtd_p_final, 'wghtd_p_sum',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        'wghtd_p', '+', 'p')
    sec_4_result = sal.join(
        wghtd_p_final, wghtd_p_sum, 'sec_4_result',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'])

    # TODO: filter out sec_4_result rows where 'store_zip3' cell is empty

    final = sal.project(sec_4_result, 'final', [
        'store_zip3', 'retailer_code', 'week_end', 'brand_code_bu',
        'brand_descr_bu', 'q', 'p'
    ])

    opened = sal.collect(final, 1)

    return set([concatenated_DFs, temp_UPC_brandBU_crspnd])