def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) if self.workload_based: mapping = mapper.WorkloadBased(W).mapping() reducer = transformation.ReduceByPartition(mapping) x = reducer.transform(x) # Reduce workload # W = support.reduce_queries(mapping, W) W = W * support.expansion_matrix(mapping) # Orange AHPparition(PA) operator in paper can be expressed # as the following sequence of simpler opeartors M = selection.Identity(x.shape).select() y = measurement.Laplace(M, self.ratio * eps).measure(x, prng) xest = inference.AHPThresholding(self.eta, self.ratio).infer(M, y, eps) mapping = mapper.AHPCluster(xest, (1 - self.ratio) * eps).mapping() # TR reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x) # SI LM LS M_bar = selection.Identity(x_bar.shape).select() y_bar = measurement.Laplace(M_bar, eps * (1 - self.ratio)).measure( x_bar, prng) x_bar_hat = inference.LeastSquares().infer(M_bar, y_bar) x_hat = support.expansion_matrix(mapping) * x_bar_hat return x_hat
def Run(self, W, x, eps): if self.workload_based: x, W = workload_based(x=x, W=W) if len(self.domain_shape) == 2: # apply hilbert transform to convert 2d domain_shape into 1d hilbert_mapping = hilbert(self.domain_shape) x = x.reduce_by_partition(hilbert_mapping) W = W * support.expansion_matrix(hilbert_mapping) mapping = x.dawa(self.ratio, self.approx, eps) x_bar = x.reduce_by_partition(mapping) W_bar = W * support.expansion_matrix(mapping) M_bar = greedyH((len(set(mapping)), ), W_bar) y = x_bar.laplace(M_bar, eps * (1 - self.ratio)) x_bar_hat = least_squares(M_bar, y) x_hat = support.expansion_matrix(mapping) * x_bar_hat if len(self.domain_shape) == 2: return support.expansion_matrix(hilbert_mapping) * x_hat return x_hat
def Run(self, W, x, eps): W = get_matrix(W) striped_mapping = striped(self.domain_shape, self.stripe_dim) x_sub_list = x.split_by_partition(striped_mapping) Ms = [] ys = [] scale_factors = [] group_idx = sorted(set(striped_mapping)) for i in group_idx: x_i = x_sub_list[group_idx.index(i)] P_i = support.projection_matrix(striped_mapping, i) W_i = W * P_i.T mapping = x_i.dawa(self.ratio, self.approx, eps) x_bar = x_i.reduce_by_partition(mapping) W_bar = W_i * support.expansion_matrix(mapping) M_bar = greedyH((len(set(mapping)), ), W_bar) y_i = x_bar.laplace(M_bar, eps * (1 - self.ratio)) W_bar = W_i * support.expansion_matrix(mapping) M_i = (M_bar * support.reduction_matrix(mapping)) * P_i Ms.append(M_i) ys.append(y_i) scale_factors.append(laplace_scale_factor(M_bar, eps)) x_hat = least_squares(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) if len(self.domain_shape) == 2: # apply hilbert transform to convert 2d domain into 1d hilbert_mapping = mapper.HilbertTransform( self.domain_shape).mapping() domain_reducer = transformation.ReduceByPartition(hilbert_mapping) x = domain_reducer.transform(x) W = W.get_matrix() * support.expansion_matrix(hilbert_mapping) dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x, prng) elif len(self.domain_shape) == 1: W = W.get_matrix() dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x) W_bar = W * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() y = measurement.Laplace(M_bar, eps * (1 - self.ratio)).measure(x_bar, prng) x_bar_hat = inference.LeastSquares().infer(M_bar, y) x_bar_hat_exp = support.expansion_matrix(mapping) * x_bar_hat if len(self.domain_shape) == 1: return x_bar_hat_exp elif len(self.domain_shape) == 2: return support.expansion_matrix(hilbert_mapping) * x_bar_hat_exp
def Run(self, W, x, eps): mapping = x.dawa(self.ratio, self.approx, eps) x_bar = x.reduce_by_partition(mapping) W_bar = W.get_matrix() * support.expansion_matrix(mapping) M_bar = greedyH((len(set(mapping)), ), W_bar) y = x_bar.laplace(M_bar, eps) x_bar_hat = least_squares(M_bar, y) x_hat = support.expansion_matrix(mapping) * x_bar_hat return x_hat
def Run(self, W, x, eps, seed): domain_dimension = len(self.domain_shape) eps_share = util.old_div(float(eps), domain_dimension) x = x.flatten() prng = np.random.RandomState(seed) Ms = [] ys = [] scale_factors = [] for i in range(domain_dimension): # Reducde domain to get marginals marginal_mapping = mapper.MarginalPartition( domain_shape=self.domain_shape, proj_dim=i).mapping() reducer = transformation.ReduceByPartition(marginal_mapping) x_i = reducer.transform(x) if self.domain_shape[i] < 50: # run identity subplan M_i = selection.Identity(x_i.shape).select() y_i = measurement.Laplace(M_i, eps_share).measure(x_i, prng) noise_scale_factor = laplace_scale_factor( M_i, eps_share) else: # run dawa subplan W = get_matrix(W) W_i = W * support.expansion_matrix(marginal_mapping) dawa = pmapper.Dawa(eps_share, self.ratio, self.approx) mapping = dawa.mapping(x_i, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x_i) W_bar = W_i * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() y_i = measurement.Laplace( M_bar, eps_share * (1 - self.ratio)).measure(x_bar, prng) noise_scale_factor = laplace_scale_factor( M_bar, eps_share * (1 - self.ratio)) # expand the dawa reduction M_i = M_bar * support.reduction_matrix(mapping) MM = M_i * support.reduction_matrix(marginal_mapping) Ms.append(MM) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares(method='lsmr').infer(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) if self.workload_based: W = get_matrix(W) mapping = mapper.WorkloadBased(W).mapping() reducer = transformation.ReduceByPartition(mapping) x = reducer.transform(x) # Reduce workload # W = support.reduce_queries(mapping, W) W = W * support.expansion_matrix(mapping) self.domain_shape = x.shape if len(self.domain_shape) == 2: # apply hilbert transform to convert 2d domain into 1d hilbert_mapping = mapper.HilbertTransform(self.domain_shape).mapping() domain_reducer = transformation.ReduceByPartition(hilbert_mapping) x = domain_reducer.transform(x) W = get_matrix(W) W = W * support.expansion_matrix(hilbert_mapping) dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x, prng) elif len(self.domain_shape) == 1: W = get_matrix(W) dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x) W_bar = W * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() if not isinstance(M_bar, np.ndarray): M_bar = M_bar.toarray() y = measurement.Laplace(M_bar, eps*(1-self.ratio)).measure(x_bar, prng) x_bar_hat = inference.LeastSquares().infer(M_bar, y) x_bar_hat_exp = support.expansion_matrix(mapping) * x_bar_hat if len(self.domain_shape) == 1: return x_bar_hat_exp elif len(self.domain_shape) == 2: return support.expansion_matrix(hilbert_mapping) * x_bar_hat_exp
def Run(self, W, x, eps): domain_dimension = len(self.domain_shape) eps_share = util.old_div(float(eps), domain_dimension) Ms = [] ys = [] scale_factors = [] for i in range(domain_dimension): # Reducde domain to get marginals marginal_mapping = marginal_partition(self.domain_shape, i) x_i = x.reduce_by_partition(marginal_mapping) if self.domain_shape[i] < 50: # run identity subplan M_i = identity((self.domain_shape[i], )) y_i = x_i.laplace(M_i, eps_share) noise_scale_factor = laplace_scale_factor(M_i, eps_share) else: # run dawa subplan W_i = W * support.expansion_matrix(marginal_mapping) mapping = x_i.dawa(self.ratio, self.approx, eps_share) x_bar = x_i.reduce_by_partition(mapping) W_bar = W_i * support.expansion_matrix(mapping) M_bar = greedyH((len(set(mapping)), ), W_bar) y_i = x_bar.laplace(M_bar, eps_share * (1 - self.ratio)) noise_scale_factor = laplace_scale_factor( M_bar, eps_share * (1 - self.ratio)) # expand the dawa reduction M_i = M_bar * support.reduction_matrix(mapping) # TODO: Ideally this would be just M_i * support.reduction_matrix(marginal_mapping) # but currently that returns an int type matrix # because the type of P_i is int MM = (support.reduction_matrix(marginal_mapping).T * M_i.T).T Ms.append(MM) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = least_squares(Ms, ys, scale_factors) return x_hat
def workload_based(x, W=None): '''workload-based domain reduction ''' mapping = mapper.WorkloadBased(W).mapping() x = x.reduce_by_partition(mapping) if W is not None: W = support.get_matrix(W) W = W * support.expansion_matrix(mapping) return x, W
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) striped_vectors = mapper.Striped(self.domain, self.stripe_dim).partitions() hd_vector = support.combine_all(striped_vectors) striped_mapping = hd_vector.flatten() x_sub_list = meta.SplitByPartition(striped_mapping).transform(x) Ms = [] ys = [] scale_factors = [] group_idx = sorted(set(striped_mapping)) # Given a group id on the full vector, recover the group id for each partition # put back in loop to save memory self.subgroups = {} for i in group_idx: selected_idx = np.where(hd_vector == i) ans = [p[i[0]] for p, i in zip(striped_vectors, selected_idx)] self.subgroups[i] = ans for i in group_idx: x_i = x_sub_list[group_idx.index(i)] # overwriting standard projection for efficiency W_i = self.project_workload(W, striped_vectors, hd_vector, i) dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x_i, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x_i) W_bar = W_i * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() if not isinstance(M_bar, np.ndarray): M_bar = M_bar.toarray() y_i = measurement.Laplace( M_bar, eps * (1 - self.ratio)).measure(x_bar, prng) noise_scale_factor = laplace_scale_factor( M_bar, eps * (1 - self.ratio)) # convert the measurement back to the original domain for inference P_i = support.projection_matrix(striped_mapping, i) M_i = (M_bar * support.reduction_matrix(mapping)) * P_i Ms.append(M_i) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares().infer(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps): mapping = x.ahp_partition(self.n, self.ratio, self.eta, eps) x_bar = x.reduce_by_partition(mapping) M_bar = identity((len(set(mapping)), )) y_bar = x_bar.laplace(M_bar, eps * (1 - self.ratio)) x_bar_hat = least_squares(M_bar, y_bar) x_hat = support.expansion_matrix(mapping) * x_bar_hat return x_hat
def Run(self, W, x, eps): striped_vectors = striped_partition(self.domain_shape, self.stripe_dim) hd_vector = support.combine_all(striped_vectors) striped_mapping = hd_vector.flatten() x_sub_list = x.split_by_partition(striped_mapping) Ms = [] ys = [] scale_factors = [] group_idx = sorted(set(striped_mapping)) # Given a group id on the full vector, recover the group id for each partition # put back in loop to save memory self.subgroups = {} for i in group_idx: selected_idx = np.where(hd_vector == i) ans = [p[i[0]] for p, i in zip(striped_vectors, selected_idx)] self.subgroups[i] = ans for i in group_idx: x_i = x_sub_list[group_idx.index(i)] # overwriting standard projection for efficiency W_i = self.project_workload(W, striped_vectors, hd_vector, i) mapping = x_i.dawa(self.ratio, self.approx, eps) x_bar = x_i.reduce_by_partition(mapping) W_bar = W_i * support.expansion_matrix(mapping) M_bar = greedyH((len(set(mapping)), ), W_bar) y_i = x_bar.laplace(M_bar, eps * (1 - self.ratio)) noise_scale_factor = laplace_scale_factor(M_bar, eps * (1 - self.ratio)) # convert the measurement back to the original domain for inference P_i = support.projection_matrix(striped_mapping, i) M_i = (M_bar * support.reduction_matrix(mapping)) * P_i Ms.append(M_i) ys.append(y_i) scale_factors.append(laplace_scale_factor(M_bar, eps)) x_hat = least_squares(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) if self.workload_based: mapping = mapper.WorkloadBased(W).mapping() reducer = transformation.ReduceByPartition(mapping) x = reducer.transform(x) # Reduce workload # W = support.reduce_queries(mapping, W) W = W * support.expansion_matrix(mapping) M = selection.Identity(x.shape).select() y = measurement.Laplace(M, eps).measure(x, prng) x_hat = inference.LeastSquares().infer(M, y) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) striped_mapping = mapper.Striped(self.domain, self.stripe_dim).mapping() x_sub_list = meta.SplitByPartition(striped_mapping).transform(x) Ms = [] ys = [] scale_factors = [] group_idx = sorted(set(striped_mapping)) W = get_matrix(W) for i in group_idx: x_i = x_sub_list[group_idx.index(i)] P_i = support.projection_matrix(striped_mapping, i) W_i = W * P_i.T dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x_i, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x_i) W_bar = W_i * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() if not isinstance(M_bar, np.ndarray): M_bar = M_bar.toarray() y_i = measurement.Laplace( M_bar, eps * (1 - self.ratio)).measure(x_bar, prng) noise_scale_factor = laplace_scale_factor( M_bar, eps * (1 - self.ratio)) M_i = (M_bar * support.reduction_matrix(mapping)) * P_i Ms.append(M_i) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares().infer(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) if self.workload_based: W = get_matrix(W) mapping = mapper.WorkloadBased(W).mapping() reducer = transformation.ReduceByPartition(mapping) x = reducer.transform(x) # Reduce workload # W = support.reduce_queries(mapping, W) W = W * support.expansion_matrix(mapping) self.domain_shape = x.shape M = selection.HB(self.domain_shape).select() if not isinstance(M, np.ndarray): M = M.toarray() y = measurement.Laplace(M, eps).measure(x, prng) x_hat = inference.LeastSquares().infer(M, y) return x_hat
def Client(kernel_service, domain, eta, ratio, n): """ This is the code that would run on the client side. The client creates a protected data source, which it queries from time to time. The client also manipulates data returned from the protected data source by applying public operators locally. """ # Protected data source mediates client-side access to kernel service R = cservice.ProtectedDataSource(kernel_service) # Filter data R = R.where('age >= 30 and age <= 39') R = R.project(['income']) # Transform relation to vector x = R.vectorize(domain) # Use fraction "ratio" of budget to determine reduced mapping mapping = x.ahp_partition(n, ratio, eta, eps_total) # Reduce x according to this mapping x_bar = x.reduce_by_partition(mapping) # Use remaining budget to get noisy x from reduced domain M_bar = identity((len(set(mapping)), )) y_bar = x_bar.laplace(M_bar, eps_total * (1 - ratio)) # Infer actual x from noisy answer x_bar_hat = non_negative_least_squares(M_bar, y_bar) # project inferred x back to original domain x_hat = support.expansion_matrix(mapping) * x_bar_hat # A Prefix workload of queries W = workload.Prefix1D(n) # Report query results print(W.get_matrix() * x_hat)
def test_complimentary_reduction_expansion(self): R = support.reduction_matrix(self.mapping) E = support.expansion_matrix(self.mapping) np.testing.assert_array_equal((R * E).toarray(), np.eye(5))
def test_expansion_matrix(self): M = support.expansion_matrix(self.mapping).toarray() self.assertEqual(M.shape, (10, 5)) np.testing.assert_array_equal(np.nonzero(M)[1], self.mapping)