Example #1
0
def calculate_workload_error_marginals(wk, strategy, eps):
    " Strategies that are a union still need special treatment " ""
    expected_error = 0
    table = []
    weights = np.array([])
    effective_queries = 0
    for Wi in wk.matrices:
        print(Wi)
        effective_queries += Wi.weight**2 * Wi.shape[0]
        err = hdmm_error.expected_error(Wi.base, strategy, eps=eps)
        expected_error += Wi.weight**2 * err

        rootmse = np.sqrt(err / Wi.shape[0])
        res = {
            'pdf': False,
            'pdf_x': False,
            'method': 'HDMM',
            'expected_error': rootmse,
            'num_query': int(Wi.shape[0]),
        }
        table.append(res)

    rootmse = np.sqrt(expected_error / effective_queries)

    graph = {
        'pdf': False,
        'pdf_x': False,
        'message':
        "Optimized strategy is marginals, doesn't support density plots.",
        'method': 'HDMM',
        'expected_error': rootmse,
        'num_query': int(wk.shape[0]),
    }

    return table, graph
Example #2
0
def experiment5():
    print('experiment5')
    n = 256
    """Want to show that we can satisfy sharing incentive by running
    the entire workload together, but weighting the workload of each 
    analyst in inverse proportion to the sensitivity of their workload """
    """ This time we try scaling the big workload matrix"""

    #using experiment 1 as an example
    W1 = matrix.EkteloMatrix(np.random.rand(32, n))

    W2 = matrix.EkteloMatrix(np.random.rand(32, n))
    #W1 = workload.Prefix(n)

    #W2 = workload.Total(n)
    #W2 = workload.Identity(n)
    W = [W1, W2]

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = fairtemplates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are either of the analysts violating sharing incentive")
    print((err1all >= err1) or (err2all >= err2))
    """Optimizing on egalitarian doesn't work either. it seems to be spending most of 
    its time optimizing the more difficult queries. In fact it seems to be getting almost
    the exact same error should it have just optimized on the first one """
    """ note results change if it's just total vs identity for the second workload.
Example #3
0
def calculate_workload_error_default(wk, strategy, eps):
    " Strategies that are a union still need special treatment " ""
    expected_error = 0
    per_query_error = np.array([])
    table = []
    weights = np.array([])
    effective_queries = 0
    for Wi in wk.matrices:
        print(Wi)
        effective_queries += Wi.weight**2 * Wi.shape[0]
        err = hdmm_error.expected_error(Wi.base, strategy, eps=eps)
        expected_error += Wi.weight**2 * err
        pqe = hdmm_error.per_query_error_sampling(Wi.base,
                                                  strategy,
                                                  eps=eps,
                                                  normalize=True)
        per_query_error = np.append(per_query_error, pqe)
        wgt = np.ones(pqe.size) * Wi.weight**2 * Wi.shape[0]
        weights = np.append(weights, wgt)

        rootmse = np.sqrt(err / Wi.shape[0])
        if np.var(pqe) < 1e-2:
            pdf = False
            pdf_x = False
        else:
            density = np.histogram(pqe, bins='auto', density=True)
            pdf = density[0].tolist()
            pdf_x = density[1][0:-1].tolist()
        res = {
            'pdf': pdf,
            'pdf_x': pdf_x,
            'method': 'HDMM',
            'expected_error': rootmse,
            'num_query': int(Wi.shape[0]),
        }
        table.append(res)

    rootmse = np.sqrt(expected_error / effective_queries)

    if np.var(per_query_error) < 1e-2:
        pdf = False
        pdf_x = False
    else:
        density = np.histogram(per_query_error,
                               bins=100,
                               weights=weights,
                               density=True)
        pdf = density[0].tolist()
        pdf_x = density[1][0:-1].tolist()

    graph = {
        'pdf': pdf,
        'pdf_x': pdf_x,
        'method': 'HDMM',
        'expected_error': rootmse,
        'num_query': int(wk.shape[0]),
    }

    return table, graph
Example #4
0
def experiment3():
    print('experiment3')
    n = 256
    """Want to show that we can satisfy sharing incentive by running
    the entire workload together, but weighting the workload of each 
    analyst in inverse proportion to the sensitivity of their workload """

    #using experiment 1 as an example
    W1 = workload.AllRange(n)
    #W2 = workload.Total(n)
    W2 = workload.Identity(n)
    W = workload.VStack([
        matrix.EkteloMatrix(np.multiply(W1.matrix, (1 / W1.sensitivity()))),
        matrix.EkteloMatrix(np.multiply(W2.matrix, (1 / W2.sensitivity())))
    ])

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are either of the analysts violating sharing incentive")
    print((err1all >= err1) or (err2all >= err2))
    """ Issue when you independently scale each matrix then merge you some of the
Example #5
0
def experiment4():
    print('experiment4')
    n = 256
    """Want to show that we can satisfy sharing incentive by running
    the entire workload together, but weighting the workload of each 
    analyst in inverse proportion to the sensitivity of their workload """
    """ This time we try scaling the big workload matrix"""

    #using experiment 1 as an example
    W1 = workload.AllRange(n)
    #W2 = workload.Total(n)
    W2 = workload.Identity(n)
    W = workload.VStack([W1, W2])
    W = matrix.EkteloMatrix((np.multiply(W.matrix, (1 / W.sensitivity()))))

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are either of the analysts violating sharing incentive")
    print((err1all >= err1) or (err2all >= err2))
    """Doesn't work either. May work when the number of analysts scales too high.
Example #6
0
def example3():
    """ Optimize Union-of-Kronecker product workload using kronecker parameterization
    and marginals parameterization """
    print('Example 3')
    sub_workloads1 = [workload.Prefix(64) for _ in range(4)]
    sub_workloads2 = [workload.AllRange(64) for _ in range(4)]
    W1 = workload.Kronecker(sub_workloads1)
    W2 = workload.Kronecker(sub_workloads2)
    W = workload.VStack([W1, W2])

    K = templates.KronPIdentity([4]*4, [64]*4)
    K.optimize(W)

    print(error.expected_error(W, K.strategy()))
    
    M = templates.Marginals([64]*4)
    M.optimize(W)

    print(error.expected_error(W, M.strategy()))

    identity = workload.Kronecker([workload.Identity(64) for _ in range(4)])
    print(error.expected_error(W, identity))
Example #7
0
def experiment1():
    print('Experiment1')
    n = 256
    """ We want an example to show that naively running HDMM on the
    entire workload ignoring identities of individual analysts does not
    satisfy sharing incentive. Intuitively, this should happen when one
    analyst has a much smaller/easier workload than the other analysts,
    such that their errors dominate the optimization """
    W1 = workload.AllRange(n)
    W2 = workload.Total(n)
    #W2 = workload.IdentityTotal(n)
    W = workload.VStack([W1, W2])

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are either of the analysts violating sharing incentive")
    print((err1all >= err1) or (err2all >= err2))
Example #8
0
def experiment2():
    print("experiment2")
    n = 256
    """ We want to show that fixing this problem by partitioning the 
    privacy budget and running each workload independently can make 
    all of the agents worse off in terms of error (should be easy to
     see when the analysts have similar workloads)."""
    W1 = workload.Total(n)
    W1 = np.multiply(W1, 1.1)
    W2 = workload.Total(n)
    W = workload.VStack([W1, W2])

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are both agents worse off by seperating their strategy")
    print((err1all < err1) and (err1all < err2))
Example #9
0
def compute_error_matrix(W, A):
    # compute error of each subworkload Wi of W wrt to each sub-strategy Aj
    # if Wi is not supported by Aj, entry (i,j) == inf

    if os.path.isfile('error_matrix.pckl'):
        file = open('error_matrix.pckl', 'r')
        return pickle.load(file)
    else:
        file = open('error_matrix.pckl', 'wb')
        error_matrix = np.zeros((len(W.matrices), len(A.matrices)))
        for i, Wi in enumerate(W.matrices):
            print(i)
            for j, Aj in enumerate(A.matrices):
                if error.strategy_supports_workload(W, A):
                    error_matrix[i, j] = error.expected_error(Wi, Aj.base)
                else:
                    error_matrix[i, j] = float('inf')
            print(error_matrix[i,:])
        pickle.dump(error_matrix, file)
        return error_matrix