예제 #1
0
    def test_pidentity(self):
        pid = templates.PIdentity(2, 8)
        pid._set_workload(workload.Prefix(8))

        x0 = self.prng.rand(16)

        func = lambda p: pid._loss_and_grad(p)[0]
        grad = lambda p: pid._loss_and_grad(p)[1]

        err = check_grad(func, grad, x0)
        print(err)
        self.assertTrue(err <= 1e-5)
예제 #2
0
    def test_default(self):
        # TODO(ryan): test fails, but we don't really use this parameterization anyway
        temp = templates.Default(10, 8)
        temp._set_workload(workload.Prefix(8))

        x0 = self.prng.rand(80)
        x0[0] = 10

        func = lambda p: temp._loss_and_grad(p)[0]
        grad = lambda p: temp._loss_and_grad(p)[1]

        err = check_grad(func, grad, x0)
        print(err)
예제 #3
0
def adult_benchmark():
    data = Dataset.load('../data/adult.csv', '../data/adult-domain.json')

    projections = [('occupation', 'race', 'capital-loss'),
                   ('occupation', 'sex', 'native-country'),
                   ('marital-status', 'relationship', 'income>50K'),
                   ('age', 'education-num', 'sex'),
                   ('workclass', 'education-num', 'occupation'),
                   ('marital-status', 'occupation', 'income>50K'),
                   ('race', 'native-country', 'income>50K'),
                   ('occupation', 'capital-gain', 'income>50K'),
                   ('marital-status', 'hours-per-week', 'income>50K'),
                   ('workclass', 'race', 'capital-gain'),
                   ('marital-status', 'relationship', 'capital-gain'),
                   ('workclass', 'education-num', 'capital-gain'),
                   ('education-num', 'relationship', 'race'),
                   ('fnlwgt', 'hours-per-week', 'income>50K'),
                   ('workclass', 'sex', 'native-country')]

    lookup = {}
    for attr in data.domain:
        n = data.domain.size(attr)
        lookup[attr] = workload.Identity(n)

    lookup['age'] = workload.Prefix(85)
    lookup['fnlwgt'] = workload.Prefix(100)
    lookup['capital-gain'] = workload.Prefix(100)
    lookup['capital-loss'] = workload.Prefix(100)
    lookup['hours-per-week'] = workload.Prefix(99)

    workloads = []

    for proj in projections:
        W = workload.Kronecker([lookup[a] for a in proj])
        workloads.append((proj, W))

    return data, workloads
예제 #4
0
    def test_augmented_identity(self):

        pid1 = templates.IdTotal(8)
        imatrix = self.prng.randint(0, 5, (3, 8))
        pid2 = templates.AugmentedIdentity(imatrix)
        strats = [pid1, pid2]

        for pid in strats:
            pid._set_workload(workload.Prefix(8))
            x0 = self.prng.rand(pid._params.size)
            func = lambda p: pid._loss_and_grad(p)[0]
            grad = lambda p: pid._loss_and_grad(p)[1]
            err = check_grad(func, grad, x0)
            print(err)
            self.assertTrue(err <= 1e-5)
예제 #5
0
 def setUp(self):
     self.prng = np.random.RandomState(10)
     n = 8
     I = matrix.Identity(n)
     O = matrix.Ones(n, n)
     P = workload.Prefix(n)
     V = matrix.VStack([I, -0.5 * O])
     H = matrix.HStack([I, -0.5 * O])
     S = matrix.Sum([I, -0.5 * O])
     K = matrix.Kronecker([I, V, P])
     W = matrix.Weighted(K, 3.0)
     M = workload.DimKMarginals((2, 3, 4), 2)
     D = workload.Disjuncts([P, I])
     N = workload.AllNormK(n, 2)
     self.matrices = [I, O, P, V, H, K, O, W, D, N, M]
예제 #6
0
    def test_marginals(self):
        # full rank case
        W = workload.Range2D(4)

        temp = templates.Marginals((4, 4))
        temp._set_workload(W)

        x0 = self.prng.rand(4)

        func = lambda p: temp._loss_and_grad(p)[0]
        grad = lambda p: temp._loss_and_grad(p)[1]

        err = check_grad(func, grad, x0)
        print(err)
        #self.assertTrue(err <= 1e-5)

        # low rank case
        P = workload.Prefix(4)
        T = workload.Total(4)
        W1 = workload.Kronecker([P, T])
        W2 = workload.Kronecker([T, P])
        W = workload.VStack([W1, W2])

        temp = templates.Marginals((4, 4))
        temp._set_workload(W)
        x0 = np.array([1, 1, 1, 0.0])

        func = lambda p: temp._loss_and_grad(p)[0]
        grad = lambda p: temp._loss_and_grad(p)[1]

        f, g = func(x0), grad(x0)
        g2 = np.zeros(4)
        for i in range(4):
            x0[i] -= 0.00001
            f1 = func(x0)
            x0[i] += 0.00002
            f2 = func(x0)
            x0[i] -= 0.00001
            g2[i] = (f2 - f1) / 0.00002

        print(g)
        print(g2)

        np.testing.assert_allclose(g, g2, atol=1e-5)
예제 #7
0
def example3():
    """ Optimize Union-of-Kronecker product workload using kronecker parameterization
    and marginals parameterization """
    print('Example 3')
    sub_workloads1 = [workload.Prefix(64) for _ in range(4)]
    sub_workloads2 = [workload.AllRange(64) for _ in range(4)]
    W1 = workload.Kronecker(sub_workloads1)
    W2 = workload.Kronecker(sub_workloads2)
    W = workload.VStack([W1, W2])

    K = templates.KronPIdentity([4]*4, [64]*4)
    K.optimize(W)

    print(error.expected_error(W, K.strategy()))
    
    M = templates.Marginals([64]*4)
    M.optimize(W)

    print(error.expected_error(W, M.strategy()))

    identity = workload.Kronecker([workload.Identity(64) for _ in range(4)])
    print(error.expected_error(W, identity))
예제 #8
0
    def setUp(self):
        super(TestData, self).setUp()

        domain_alt = (10, 10, 7, 4, 2)  # numpy shape tuple
        self.expr_seed = 12345
        self.expr_eps = 0.1
        self.delimiter = ','

        relation = data.RelationHelper('CPS').load()

        # Full bin dataset
        self.X1 = DatasetFromRelation(relation, 'CPS-CSV')

        # Reduced bin dataset
        self.X2 = DatasetFromRelation(relation,
                                      'CPS-CSV',
                                      reduce_to_dom_shape=domain_alt)

        # Workload and Algorithms
        self.W1 = workload.Prefix(20)
        self.W2 = workload.RandomRange(None, (64, ), 25)
        self.A2 = ahp.ahpND_engine()
예제 #9
0
    def setUp(self):
        
        self.prng = np.random.RandomState(0)
        
        self.domain = (2,3,4)
        I = lambda n: workload.Identity(n)
        T = lambda n: workload.Total(n)
        P = lambda n: workload.Prefix(n)
        R = lambda n: matrix.EkteloMatrix(self.prng.rand(n,n))
    
        W1 = workload.Kronecker([I(2), T(3), P(4)])
        W2 = workload.Kronecker([T(2), T(3), I(4)])
        W3 = workload.Kronecker([I(2), I(3), T(4)])
        
        self.W = workload.VStack([W1, W2, W3])

        # three representations of Identity matrix
        self.A1 = I(2*3*4)
        self.A2 = workload.Kronecker([I(2),I(3),I(4)])
        self.A3 = workload.Marginals.fromtuples(self.domain, {(0,1,2) : 1.0 })

        self.A4 = workload.Marginals(self.domain, self.prng.rand(8))
        self.A5 = workload.Kronecker([R(2), R(3), R(4)])
예제 #10
0
def Client(kernel_service, domain, eta, ratio, n):
    """ This is the code that would run on the client side. The client 
        creates a protected data source, which it queries from time to time. 
        The client also manipulates data returned from the protected data 
        source by applying public operators locally.
    """
    # Protected data source mediates client-side access to kernel service
    R = cservice.ProtectedDataSource(kernel_service)

    # Filter data
    R = R.where('age >= 30 and age <= 39')
    R = R.project(['income'])

    # Transform relation to vector
    x = R.vectorize(domain)

    # Use fraction "ratio" of budget to determine reduced mapping
    mapping = x.ahp_partition(n, ratio, eta, eps_total)

    # Reduce x according to this mapping
    x_bar = x.reduce_by_partition(mapping)

    # Use remaining budget to get noisy x from reduced domain
    M_bar = identity((len(set(mapping)), ))
    y_bar = x_bar.laplace(M_bar, eps_total * (1 - ratio))

    # Infer actual x from noisy answer
    x_bar_hat = non_negative_least_squares(M_bar, y_bar)

    # project inferred x back to original domain
    x_hat = support.expansion_matrix(mapping) * x_bar_hat

    # A Prefix workload of queries
    W = workload.Prefix(n)

    # Report query results
    print(W.matrix * x_hat)