Python VStack Examples, hdmm.workload.VStack Python Examples

Example #1

0

Show file

def grouped_workload(W, A, error_matrix, match_type='BEST'):
    #
    # Comment
    #

    def best(i, j):  # partition workloads into groups assoc. with best supporting strategy
        return j == np.argmin(error_matrix, axis=1)[i]

    def supported(i, j): # form redundant groups based on all supporting strategies
        return error_matrix[i,j] < float('inf')

    def top(i, j, lim=2):
        return supported(i,j) and (j in np.argsort(error_matrix, axis=1)[0:lim])

    if match_type == 'BEST':
        match = best
    elif match_type == 'TOP':
        match = top
    elif match_type == 'SUPPORTED':
        match = supported
    else:
        assert False

    groups = [[] for _ in range(len(A.matrices))]

    for i in range(len(W.matrices)):
        for j in range(len(A.matrices)):
            if match(i,j):
                groups[j].append(W.matrices[i])

    w_grouped = [workload.VStack(g) for g in groups if len(g) > 0]

    return w_grouped

Example #2

0

Show file

File: benchmarks.py Project: dpcomp-org/hdmm

def census():
    W = SF1_Persons()
    workloads = []
    for K in W.matrices:
        w = K.matrices
        Wi = workload.Kronecker([w[0], w[1], w[2], w[4]])
        workloads.append(Wi)
    W1 = workload.VStack(workloads)

    M = workload.IdentityTotal
    workloads = []
    for K in W.matrices:
        w = K.matrices
        Wi = workload.Kronecker([w[0], w[1], w[2], w[4], M(51)])
        workloads.append(Wi)
    W2 = workload.VStack(workloads)

    return W1, W2

Example #3

0

Show file

    def dict2workload(workload_dict: Dict[str, AbstractLinearQuery]):
        """ Convert a dict of queries into HDMM Workload"""
        workload_list: List[workload.Kron] = []
        for query in workload_dict.values():
            kron_factors = [
                workload.EkteloMatrix(x) for x in query.kronFactors()
            ]
            workload_list.append(workload.Kronecker(kron_factors))

        return workload.VStack(workload_list)

Example #4

0

Show file

File: benchmarks.py Project: dpcomp-org/hdmm

def SmallKrons(blocks, size=5000):
    base = [workload.Total(W.shape[1]) for W in blocks]
    d = len(blocks)
    concat = []
    for attr in powerset(range(d)):
        subs = [blocks[i] if i in attr else base[i] for i in range(d)]
        tmp = reduce(lambda x, y: x * y, [blocks[i].shape[1] for i in attr], 1)
        W = workload.Kronecker(subs)
        if tmp <= size:
            concat.append(W)
    return workload.VStack(concat)

Example #5

0

Show file

File: benchmarks.py Project: dpcomp-org/hdmm

def DimKKrons(workloads, k=1):
    blocks = workloads
    base = [workload.Total(W.shape[1]) for W in blocks]
    d = len(blocks)

    concat = []
    for attr in itertools.combinations(range(d), k):
        subs = [blocks[i] if i in attr else base[i] for i in range(d)]
        W = workload.Kronecker(subs)
        concat.append(W)

    return workload.VStack(concat)

Example #6

0

Show file

def experiment3():
    print('experiment3')
    n = 256
    """Want to show that we can satisfy sharing incentive by running
    the entire workload together, but weighting the workload of each 
    analyst in inverse proportion to the sensitivity of their workload """

    #using experiment 1 as an example
    W1 = workload.AllRange(n)
    #W2 = workload.Total(n)
    W2 = workload.Identity(n)
    W = workload.VStack([
        matrix.EkteloMatrix(np.multiply(W1.matrix, (1 / W1.sensitivity()))),
        matrix.EkteloMatrix(np.multiply(W2.matrix, (1 / W2.sensitivity())))
    ])

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are either of the analysts violating sharing incentive")
    print((err1all >= err1) or (err2all >= err2))
    """ Issue when you independently scale each matrix then merge you some of the

Example #7

0

Show file

def experiment4():
    print('experiment4')
    n = 256
    """Want to show that we can satisfy sharing incentive by running
    the entire workload together, but weighting the workload of each 
    analyst in inverse proportion to the sensitivity of their workload """
    """ This time we try scaling the big workload matrix"""

    #using experiment 1 as an example
    W1 = workload.AllRange(n)
    #W2 = workload.Total(n)
    W2 = workload.Identity(n)
    W = workload.VStack([W1, W2])
    W = matrix.EkteloMatrix((np.multiply(W.matrix, (1 / W.sensitivity()))))

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are either of the analysts violating sharing incentive")
    print((err1all >= err1) or (err2all >= err2))
    """Doesn't work either. May work when the number of analysts scales too high.

Example #8

0

Show file

def example3():
    """ Optimize Union-of-Kronecker product workload using kronecker parameterization
    and marginals parameterization """
    print('Example 3')
    sub_workloads1 = [workload.Prefix(64) for _ in range(4)]
    sub_workloads2 = [workload.AllRange(64) for _ in range(4)]
    W1 = workload.Kronecker(sub_workloads1)
    W2 = workload.Kronecker(sub_workloads2)
    W = workload.VStack([W1, W2])

    K = templates.KronPIdentity([4]*4, [64]*4)
    K.optimize(W)

    print(error.expected_error(W, K.strategy()))
    
    M = templates.Marginals([64]*4)
    M.optimize(W)

    print(error.expected_error(W, M.strategy()))

    identity = workload.Kronecker([workload.Identity(64) for _ in range(4)])
    print(error.expected_error(W, identity))

Example #9

0

Show file

def experiment1():
    print('Experiment1')
    n = 256
    """ We want an example to show that naively running HDMM on the
    entire workload ignoring identities of individual analysts does not
    satisfy sharing incentive. Intuitively, this should happen when one
    analyst has a much smaller/easier workload than the other analysts,
    such that their errors dominate the optimization """
    W1 = workload.AllRange(n)
    W2 = workload.Total(n)
    #W2 = workload.IdentityTotal(n)
    W = workload.VStack([W1, W2])

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are either of the analysts violating sharing incentive")
    print((err1all >= err1) or (err2all >= err2))

Example #10

0

Show file

def experiment2():
    print("experiment2")
    n = 256
    """ We want to show that fixing this problem by partitioning the 
    privacy budget and running each workload independently can make 
    all of the agents worse off in terms of error (should be easy to
     see when the analysts have similar workloads)."""
    W1 = workload.Total(n)
    W1 = np.multiply(W1, 1.1)
    W2 = workload.Total(n)
    W = workload.VStack([W1, W2])

    #workload 1 with half the budget
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W1)
    print("Workload 1 with half the budget")
    err1 = error.expected_error(W1, pid.strategy(), eps=0.5)
    print(err1)

    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W2)
    print("Workload 2 with half the budget")
    err2 = error.expected_error(W2, pid.strategy(), eps=0.5)
    print(err2)

    #both workloads together
    pid = templates.PIdentity(max(1, n // 16), n)
    pid.optimize(W)
    print("Both workloads with all the budgets")
    err1all = error.expected_error(W1, pid.strategy(), eps=1)
    err2all = error.expected_error(W2, pid.strategy(), eps=1)
    print("W1")
    print(err1all)
    print("W2")
    print(err2all)
    print("Are both agents worse off by seperating their strategy")
    print((err1all < err1) and (err1all < err2))

Example #11

0

Show file

# this is a 2d example:
domain = (10, 25)

# densely represented sub-workloads in each of the dimensions
identity1 = workload.EkteloMatrix(np.eye(10))
identity2 = workload.EkteloMatrix(np.eye(25))
total = workload.EkteloMatrix(np.ones((1, 10)))
prefix = workload.EkteloMatrix(np.tril(np.ones((25, 25))))

# form the kron products in each dimension
W1 = workload.Kronecker([identity1, identity2])
W2 = workload.Kronecker([total, prefix])

# form the union of krons
W = workload.VStack([W1, W2])

# find a Kronecker product strategy by optimizing the workload
ps = [2, 2]  # parameter for P-Identity strategies
template = templates.KronPIdentity(ps, domain)

# run optimization
template.optimize(W)

# get the sparse, explicit representation of the optimized strategy
A = template.strategy().sparse_matrix().tocsr()

# Round for Geometric Mechanism (skip this if using Laplace Mechanism)
A = np.round(A * 1000) / 1000.0

# Extract diagonal and non-diagonal portion of strategy