def Run(self, W, x, eps, seed): domain_dimension = len(self.domain_shape) eps_share = util.old_div(float(eps), domain_dimension) x = x.flatten() prng = np.random.RandomState(seed) Ms = [] ys = [] scale_factors = [] for i in range(domain_dimension): # Reducde domain to get marginals marginal_mapping = mapper.MarginalPartition( domain_shape=self.domain_shape, proj_dim=i).mapping() reducer = transformation.ReduceByPartition(marginal_mapping) x_i = reducer.transform(x) if self.domain_shape[i] < 50: # run identity subplan M_i = selection.Identity(x_i.shape).select() y_i = measurement.Laplace(M_i, eps_share).measure(x_i, prng) noise_scale_factor = laplace_scale_factor( M_i, eps_share) else: # run dawa subplan W = get_matrix(W) W_i = W * support.expansion_matrix(marginal_mapping) dawa = pmapper.Dawa(eps_share, self.ratio, self.approx) mapping = dawa.mapping(x_i, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x_i) W_bar = W_i * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() y_i = measurement.Laplace( M_bar, eps_share * (1 - self.ratio)).measure(x_bar, prng) noise_scale_factor = laplace_scale_factor( M_bar, eps_share * (1 - self.ratio)) # expand the dawa reduction M_i = M_bar * support.reduction_matrix(mapping) MM = M_i * support.reduction_matrix(marginal_mapping) Ms.append(MM) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares(method='lsmr').infer(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) striped_mapping = mapper.Striped(self.domain, self.stripe_dim).mapping() x_sub_list = meta.SplitByPartition(striped_mapping).transform(x) Ms = [] ys = [] scale_factors = [] group_idx = sorted(set(striped_mapping)) for i in group_idx: x_i = x_sub_list[group_idx.index(i)] P_i = support.projection_matrix(striped_mapping, i) M_bar = selection.HB(x_i.shape).select() y_i = measurement.Laplace(M_bar, eps).measure(x_i, prng) noise_scale_factor = laplace_scale_factor(M_bar, eps) M_i = M_bar * P_i Ms.append(M_i) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares().infer(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) striped_vectors = mapper.Striped(self.domain, self.stripe_dim).partitions() hd_vector = support.combine_all(striped_vectors) striped_mapping = hd_vector.flatten() x_sub_list = meta.SplitByPartition(striped_mapping).transform(x) Ms = [] ys = [] scale_factors = [] group_idx = sorted(set(striped_mapping)) # Given a group id on the full vector, recover the group id for each partition # put back in loop to save memory self.subgroups = {} for i in group_idx: selected_idx = np.where(hd_vector == i) ans = [p[i[0]] for p, i in zip(striped_vectors, selected_idx)] self.subgroups[i] = ans for i in group_idx: x_i = x_sub_list[group_idx.index(i)] # overwriting standard projection for efficiency W_i = self.project_workload(W, striped_vectors, hd_vector, i) dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x_i, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x_i) W_bar = W_i * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() if not isinstance(M_bar, np.ndarray): M_bar = M_bar.toarray() y_i = measurement.Laplace( M_bar, eps * (1 - self.ratio)).measure(x_bar, prng) noise_scale_factor = laplace_scale_factor( M_bar, eps * (1 - self.ratio)) # convert the measurement back to the original domain for inference P_i = support.projection_matrix(striped_mapping, i) M_i = (M_bar * support.reduction_matrix(mapping)) * P_i Ms.append(M_i) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares().infer(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) domain_size = np.prod(self.domain_shape) # Start with a unifrom estimation of x x_hat = np.array([self.data_scale / float(domain_size)] * domain_size) # non-zero regs to avoid super long convergence time. nnls = inference.NonNegativeLeastSquares(l1_reg=1e-6, l2_reg=1e-6) measuredQueries = [] M_history = [] y_history = [] noise_scales = [] if self.total_noise_scale != 0: M_history.append(workload.Total(domain_size)) y_history.append(np.array([self.data_scale])) noise_scales.append(self.total_noise_scale) for i in range(1, self.rounds+1): eps_round = eps / float(self.rounds) # SW + SH2 worst_approx = pselection.WorstApprox(W, measuredQueries, x_hat, eps_round * self.ratio) W_next = worst_approx.select(x, prng) measuredQueries.append(W_next.mwem_index) M = selection.AddEquiWidthIntervals(W_next, i).select() laplace = measurement.Laplace(M, eps_round * (1-self.ratio)) y = laplace.measure(x, prng) # default use history M_history.append(M) y_history.append(y) noise_scales.append(laplace_scale_factor(M, eps_round * (1-self.ratio))) x_hat = nnls.infer(M_history, y_history, noise_scales) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) domain_size = np.prod(self.domain_shape) # Start with a unifrom estimation of x x_hat = np.array([self.data_scale / float(domain_size)] * domain_size) W = get_matrix(W) if not isinstance(W, np.ndarray): W = W.toarray() measuredQueries = [] nnls = inference.NonNegativeLeastSquares(method='new') M_history = np.empty((0, domain_size)) y_history = [] for i in range(1, self.rounds+1): eps_round = eps / float(self.rounds) # SW + SH2 worst_approx = pselection.WorstApprox(W, measuredQueries, x_hat, eps_round * self.ratio) W_next = worst_approx.select(x, prng) measuredQueries.append(W_next.mwem_index) M = selection.AddEquiWidthIntervals(W_next, i).select() if not isinstance(M, np.ndarray): M = M.toarray() laplace = measurement.Laplace(M, eps_round * (1-self.ratio)) y = laplace.measure(x, prng) # default use history M_history = np.vstack([M_history, M]) y_history.extend(y) if self.total_noise_scale != 0: total_query = sparse.csr_matrix([1]*domain_size) noise_scale = laplace_scale_factor(M, eps_round * (1-self.ratio)) x_hat = nnls.infer([total_query, M_history], [[self.data_scale], y_history], [self.total_noise_scale, noise_scale]) else: x_hat = nnls.infer(M, y) return x_hat
def Run(self, W, x, eps, seed): x = x.flatten() prng = np.random.RandomState(seed) striped_mapping = mapper.Striped(self.domain, self.stripe_dim).mapping() x_sub_list = meta.SplitByPartition(striped_mapping).transform(x) Ms = [] ys = [] scale_factors = [] group_idx = sorted(set(striped_mapping)) W = get_matrix(W) for i in group_idx: x_i = x_sub_list[group_idx.index(i)] P_i = support.projection_matrix(striped_mapping, i) W_i = W * P_i.T dawa = pmapper.Dawa(eps, self.ratio, self.approx) mapping = dawa.mapping(x_i, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x_i) W_bar = W_i * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() if not isinstance(M_bar, np.ndarray): M_bar = M_bar.toarray() y_i = measurement.Laplace( M_bar, eps * (1 - self.ratio)).measure(x_bar, prng) noise_scale_factor = laplace_scale_factor( M_bar, eps * (1 - self.ratio)) M_i = (M_bar * support.reduction_matrix(mapping)) * P_i Ms.append(M_i) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares().infer(Ms, ys, scale_factors) return x_hat
def Run(self, W, x, eps, seed): prng = np.random.RandomState(seed) domain_size = np.prod(self.domain_shape) # Start with a unifrom estimation of x x_hat = np.array([self.data_scale / float(domain_size)] * domain_size) W = get_matrix(W) W_partial = sparse.csr_matrix(W.shape) nnls = inference.NonNegativeLeastSquares() M_history = np.empty((0, domain_size)) y_history = [] for i in range(1, self.rounds+1): eps_round = eps / float(self.rounds) # SW + SH2 worst_approx = pselection.WorstApprox(sparse.csr_matrix(W), W_partial, x_hat, eps_round * self.ratio) W_next = worst_approx.select(x, prng) M = selection.AddEquiWidthIntervals(W_next, i).select() W_partial += W_next laplace = measurement.Laplace(M, eps_round * (1-self.ratio)) y = laplace.measure(x, prng) # default use history M_history = sparse.vstack([M_history, M]) y_history.extend(y) if self.total_noise_scale != 0: total_query = sparse.csr_matrix([1]*domain_size) noise_scale = laplace_scale_factor(M, eps_round * (1-self.ratio)) x_hat = nnls.infer([total_query, M_history], [[self.data_scale], y_history], [self.total_noise_scale, noise_scale]) else: x_hat = nnls.infer(M, y) return x_hat
def test_get_y(self): y = Laplace(self.A, self.eps_share).measure(self.X, self.prng) noise_scales = [laplace_scale_factor(self.A, self.eps_share)] * len(y) np.testing.assert_array_equal(np.diag(y * get_A(self.A, noise_scales)), get_y(y, noise_scales).flatten())
def test_get_A(self): y = Laplace(self.A, self.eps_share).measure(self.X, self.prng) noise_scales = [laplace_scale_factor(self.A, self.eps_share)] * len(y) np.testing.assert_array_equal(np.array(noise_scales), 1 / np.diag(get_A(self.A, noise_scales)))