def test_pidentity(self): pid = templates.PIdentity(2, 8) pid._set_workload(workload.Prefix(8)) x0 = self.prng.rand(16) func = lambda p: pid._loss_and_grad(p)[0] grad = lambda p: pid._loss_and_grad(p)[1] err = check_grad(func, grad, x0) print(err) self.assertTrue(err <= 1e-5)
def test_default(self): # TODO(ryan): test fails, but we don't really use this parameterization anyway temp = templates.Default(10, 8) temp._set_workload(workload.Prefix(8)) x0 = self.prng.rand(80) x0[0] = 10 func = lambda p: temp._loss_and_grad(p)[0] grad = lambda p: temp._loss_and_grad(p)[1] err = check_grad(func, grad, x0) print(err)
def adult_benchmark(): data = Dataset.load('../data/adult.csv', '../data/adult-domain.json') projections = [('occupation', 'race', 'capital-loss'), ('occupation', 'sex', 'native-country'), ('marital-status', 'relationship', 'income>50K'), ('age', 'education-num', 'sex'), ('workclass', 'education-num', 'occupation'), ('marital-status', 'occupation', 'income>50K'), ('race', 'native-country', 'income>50K'), ('occupation', 'capital-gain', 'income>50K'), ('marital-status', 'hours-per-week', 'income>50K'), ('workclass', 'race', 'capital-gain'), ('marital-status', 'relationship', 'capital-gain'), ('workclass', 'education-num', 'capital-gain'), ('education-num', 'relationship', 'race'), ('fnlwgt', 'hours-per-week', 'income>50K'), ('workclass', 'sex', 'native-country')] lookup = {} for attr in data.domain: n = data.domain.size(attr) lookup[attr] = workload.Identity(n) lookup['age'] = workload.Prefix(85) lookup['fnlwgt'] = workload.Prefix(100) lookup['capital-gain'] = workload.Prefix(100) lookup['capital-loss'] = workload.Prefix(100) lookup['hours-per-week'] = workload.Prefix(99) workloads = [] for proj in projections: W = workload.Kronecker([lookup[a] for a in proj]) workloads.append((proj, W)) return data, workloads
def test_augmented_identity(self): pid1 = templates.IdTotal(8) imatrix = self.prng.randint(0, 5, (3, 8)) pid2 = templates.AugmentedIdentity(imatrix) strats = [pid1, pid2] for pid in strats: pid._set_workload(workload.Prefix(8)) x0 = self.prng.rand(pid._params.size) func = lambda p: pid._loss_and_grad(p)[0] grad = lambda p: pid._loss_and_grad(p)[1] err = check_grad(func, grad, x0) print(err) self.assertTrue(err <= 1e-5)
def setUp(self): self.prng = np.random.RandomState(10) n = 8 I = matrix.Identity(n) O = matrix.Ones(n, n) P = workload.Prefix(n) V = matrix.VStack([I, -0.5 * O]) H = matrix.HStack([I, -0.5 * O]) S = matrix.Sum([I, -0.5 * O]) K = matrix.Kronecker([I, V, P]) W = matrix.Weighted(K, 3.0) M = workload.DimKMarginals((2, 3, 4), 2) D = workload.Disjuncts([P, I]) N = workload.AllNormK(n, 2) self.matrices = [I, O, P, V, H, K, O, W, D, N, M]
def test_marginals(self): # full rank case W = workload.Range2D(4) temp = templates.Marginals((4, 4)) temp._set_workload(W) x0 = self.prng.rand(4) func = lambda p: temp._loss_and_grad(p)[0] grad = lambda p: temp._loss_and_grad(p)[1] err = check_grad(func, grad, x0) print(err) #self.assertTrue(err <= 1e-5) # low rank case P = workload.Prefix(4) T = workload.Total(4) W1 = workload.Kronecker([P, T]) W2 = workload.Kronecker([T, P]) W = workload.VStack([W1, W2]) temp = templates.Marginals((4, 4)) temp._set_workload(W) x0 = np.array([1, 1, 1, 0.0]) func = lambda p: temp._loss_and_grad(p)[0] grad = lambda p: temp._loss_and_grad(p)[1] f, g = func(x0), grad(x0) g2 = np.zeros(4) for i in range(4): x0[i] -= 0.00001 f1 = func(x0) x0[i] += 0.00002 f2 = func(x0) x0[i] -= 0.00001 g2[i] = (f2 - f1) / 0.00002 print(g) print(g2) np.testing.assert_allclose(g, g2, atol=1e-5)
def example3(): """ Optimize Union-of-Kronecker product workload using kronecker parameterization and marginals parameterization """ print('Example 3') sub_workloads1 = [workload.Prefix(64) for _ in range(4)] sub_workloads2 = [workload.AllRange(64) for _ in range(4)] W1 = workload.Kronecker(sub_workloads1) W2 = workload.Kronecker(sub_workloads2) W = workload.VStack([W1, W2]) K = templates.KronPIdentity([4]*4, [64]*4) K.optimize(W) print(error.expected_error(W, K.strategy())) M = templates.Marginals([64]*4) M.optimize(W) print(error.expected_error(W, M.strategy())) identity = workload.Kronecker([workload.Identity(64) for _ in range(4)]) print(error.expected_error(W, identity))
def setUp(self): super(TestData, self).setUp() domain_alt = (10, 10, 7, 4, 2) # numpy shape tuple self.expr_seed = 12345 self.expr_eps = 0.1 self.delimiter = ',' relation = data.RelationHelper('CPS').load() # Full bin dataset self.X1 = DatasetFromRelation(relation, 'CPS-CSV') # Reduced bin dataset self.X2 = DatasetFromRelation(relation, 'CPS-CSV', reduce_to_dom_shape=domain_alt) # Workload and Algorithms self.W1 = workload.Prefix(20) self.W2 = workload.RandomRange(None, (64, ), 25) self.A2 = ahp.ahpND_engine()
def setUp(self): self.prng = np.random.RandomState(0) self.domain = (2,3,4) I = lambda n: workload.Identity(n) T = lambda n: workload.Total(n) P = lambda n: workload.Prefix(n) R = lambda n: matrix.EkteloMatrix(self.prng.rand(n,n)) W1 = workload.Kronecker([I(2), T(3), P(4)]) W2 = workload.Kronecker([T(2), T(3), I(4)]) W3 = workload.Kronecker([I(2), I(3), T(4)]) self.W = workload.VStack([W1, W2, W3]) # three representations of Identity matrix self.A1 = I(2*3*4) self.A2 = workload.Kronecker([I(2),I(3),I(4)]) self.A3 = workload.Marginals.fromtuples(self.domain, {(0,1,2) : 1.0 }) self.A4 = workload.Marginals(self.domain, self.prng.rand(8)) self.A5 = workload.Kronecker([R(2), R(3), R(4)])
def Client(kernel_service, domain, eta, ratio, n): """ This is the code that would run on the client side. The client creates a protected data source, which it queries from time to time. The client also manipulates data returned from the protected data source by applying public operators locally. """ # Protected data source mediates client-side access to kernel service R = cservice.ProtectedDataSource(kernel_service) # Filter data R = R.where('age >= 30 and age <= 39') R = R.project(['income']) # Transform relation to vector x = R.vectorize(domain) # Use fraction "ratio" of budget to determine reduced mapping mapping = x.ahp_partition(n, ratio, eta, eps_total) # Reduce x according to this mapping x_bar = x.reduce_by_partition(mapping) # Use remaining budget to get noisy x from reduced domain M_bar = identity((len(set(mapping)), )) y_bar = x_bar.laplace(M_bar, eps_total * (1 - ratio)) # Infer actual x from noisy answer x_bar_hat = non_negative_least_squares(M_bar, y_bar) # project inferred x back to original domain x_hat = support.expansion_matrix(mapping) * x_bar_hat # A Prefix workload of queries W = workload.Prefix(n) # Report query results print(W.matrix * x_hat)