def numeric_sparse(data, query_list, T, c, epsilon, delta): if delta == 0: epsilon_1 = 8 * epsilon / 9 epsilon_2 = 2 * epsilon / 9 def sigma(eps): return 2 * c / eps else: epsilon_1 = np.sqrt(512) * epsilon / (np.sqrt(512) + 1) epsilon_2 = 2 * epsilon / (np.sqrt(512) + 1) def sigma(eps): return np.sqrt(32 * c * np.log(2 / delta)) / eps T_hat_count = T + random.laplace(scale=sigma(epsilon_1)) count = 0 answer_list = [] for query in query_list: v_query_1 = random.laplace(scale=2 * sigma(epsilon_1)) query_response = data.evaluate(query) if query_response + v_query_1 >= T_hat_count: v_query_2 = random.laplace(scale=sigma(epsilon_2)) answer_list.append(query_response + v_query_2) count = count + 1 T_hat_count = T + random.laplace(scale=sigma(epsilon_1)) else: answer_list.append(STOP_SIGN) if count >= c: return answer_list return answer_list
def find(data, domain, goal_number, failure, eps, sparse=True): """ Based on "Locating a Small Cluster Privately" by Kobbi Nissim, Uri Stemmer, and Salil Vadhan. PODS 2016. Given a data set, finds the radius of an approximately minimal cluster of points with approximately the desired amount of points :param data: list of points in R^dimension :param domain: tuple(absolute value of domain's end as int, minimum intervals in domain as float) :param goal_number: the number of desired points in the resulting cluster :param failure: 0 < float < 1. chances that the procedure will fail to return an answer :param eps: float > 0. privacy parameter :param sparse: 1 > float > 0. privacy parameter :return: the radius of the resulting cluster """ # max(abs(np.min(data)), np.max(data)) all_distances = distances(data) # TODO change variable name # 'a' need to greater than - log(domain[0] / failure) / eps a = 2 * log(domain[0] / failure) / eps thresh = goal_number - a - log(1 / failure) / eps # TODO verify that the noise addition is correct if __max_average_ball__(0, all_distances, goal_number) + laplace(0, 1 / eps, 1) > thresh: return 0 dimension = data.shape[1] # TODO maybe a little less sparse? if sparse: new_domain = __sparse_domain__(domain, dimension) else: new_domain = __create_regular_domain__(domain, dimension) def quality(d, r): return min(goal_number - __max_average_ball__(r / 2, all_distances, goal_number), __max_average_ball__(r, all_distances, goal_number) - goal_number + 2*a) / 2 return exponential_mechanism_big(data, new_domain, quality, eps / 2)
def sanitize(samples, alpha, beta, eps, delta): """ :param samples: :param alpha: :param beta: :param eps: :param delta: :return: """ max_sample = max(samples) dim = ceil(log2(max_sample + 1)) end_domain = 2**int(dim) remaining_samples = set(range(end_domain)) est = dict.fromkeys(remaining_samples, 0) new_beta = alpha * beta / 4 new_eps = eps / sqrt(32 * log(5/delta) / alpha) new_delta = alpha * delta / 5 for i in range(int(2/alpha)): q = __point_choice_quality__(remaining_samples) b = choosing_mechanism_big(samples, remaining_samples, q, 1, alpha/2, new_beta, new_eps, new_delta) if b != 'bottom': remaining_samples.remove(b) # remaining_samples[b] -= 1 # remaining_samples += Counter() est[b] = concept_query(samples, point_concept(b)) + laplace(0, 1 / eps / len(samples), 1)[0] return est
def sanitize(samples, alpha, beta, eps, delta): """ :param samples: :param alpha: :param beta: :param eps: :param delta: :return: """ max_sample = max(samples) dim = ceil(log2(max_sample + 1)) end_domain = 2 ** int(dim) remaining_samples = set(range(end_domain)) est = dict.fromkeys(remaining_samples, 0) new_beta = alpha * beta / 4 new_eps = eps / sqrt(32 * log(5 / delta) / alpha) new_delta = alpha * delta / 5 for i in range(int(2 / alpha)): q = __point_choice_quality__(remaining_samples) b = choosing_mechanism_big(samples, remaining_samples, q, 1, alpha / 2, new_beta, new_eps, new_delta) if b != "bottom": remaining_samples.remove(b) # remaining_samples[b] -= 1 # remaining_samples += Counter() est[b] = concept_query(samples, point_concept(b)) + laplace(0, 1 / eps / len(samples), 1)[0] return est
def get_noisy_distribution_of_attributes(attributes, encoded_dataset, epsilon=0.1): data = encoded_dataset.copy().loc[:, attributes] data['count'] = 1 stats = data.groupby(attributes).sum() iterables = [ range(int(encoded_dataset[attr].max()) + 1) for attr in attributes ] full_space = DataFrame(columns=attributes, data=list(product(*iterables))) stats.reset_index(inplace=True) stats = merge(full_space, stats, how='left') stats.fillna(0, inplace=True) if epsilon: k = len(attributes) - 1 num_tuples, num_attributes = encoded_dataset.shape noise_para = laplace_noise_parameter(k, num_attributes, num_tuples, epsilon) laplace_noises = laplace(0, scale=noise_para, size=stats.index.size) stats['count'] += laplace_noises stats.loc[stats['count'] < 0, 'count'] = 0 return stats
def _get_attribute_frequency_counts(self, attributes, encoded_dataset): """ Differentially private mechanism to get attribute frequency counts""" # Get attribute counts for category combinations present in data counts = encoded_dataset.groupby(attributes).size() counts.name = 'count' counts = counts.reset_index() # Get all possible attribute combinations attr_combs = [ range(self.DataDescriber.attr_dict[attr].domain_size) for attr in attributes ] full_space = DataFrame(columns=attributes, data=list(product(*attr_combs))) full_counts = merge(full_space, counts, how='left') full_counts.fillna(0, inplace=True) # Get Laplace noise sample noise_sample = laplace(0, scale=self.laplace_noise_scale, size=full_counts.index.size) full_counts['count'] += noise_sample full_counts.loc[full_counts['count'] < 0, 'count'] = 0 return full_counts
def noise_iid(size, noise_scale, noise_tail="normal", **kwargs): if noise_tail == "normal": return npr.normal(0, 1, size=size) * noise_scale elif noise_tail == "laplace": return npr.laplace(0, 1, size=size) * noise_scale elif noise_tail == "cauchy": return npr.standard_cauchy(size=size) * noise_scale
def randomize(self, oh_data, private=True): """ Add Laplace noise. """ for row in oh_data: for i in range(len(row)): b = laplace(scale=1.0/self.epsilon) if private else 0 row[i] += b return oh_data
def histograms(data, dimension, shift, side, eps, delta): """ Based on Theorem 2.5 from "Locating a Small Cluster Privately" by Kobbi Nissim, Uri Stemmer, and Salil Vadhan. PODS 2016. find parts of R^d that contain a lot of data-points when partitioning R^d into boxes of the same size the boxes partitioning is given by the shift and the size of the 'boxes' :param data: list of points in R^dimension :param dimension: the dimension of the space which the points are taken from :param shift: the partition's shift, the i-th value represents the shift in the i-th axis :param side: the side-length of each 'box' in the partition :param eps: privacy parameter :param delta: privacy parameter :return: parts of the partition that contain a lot of data-points """ my_box = partial(__box_containing_point__, partition=shift, dimension=dimension, side_length=side) # those are the parts of the partition that have at least one point # when each box appears as many times as the numbers of points in it boxes = [my_box(point) for point in data] boxes_quality = Counter(boxes) non_zero = False for b in boxes_quality: boxes_quality[b] += laplace(0, 2/eps, 1)[0] if boxes_quality[b] < 2*np.log(2/delta)/eps: boxes_quality[b] = 0 # the current boxes_quality won't be '0' so the process can return an answer elif not non_zero: non_zero = True if not non_zero: raise ValueError('No high quality box') return max(boxes_quality)
def variance_operation(elist, elist_sq, auths, dp): G = EcGroup(nid=conf.EC_GROUP) #E(x^2) = (S(ri^2)/N) plain_sum_sqs = list_sum_decryption(elist_sq, auths) #print "plain_sum_sqs: " + str(plain_sum_sqs) first = plain_sum_sqs/len(elist) #E(x)^2 = (S(ri)/N)^2 esum = Classes.Ct.sum(elist) plain_sum = collective_decryption(esum, auths) tmp = float(plain_sum) / float(len(elist)) second = tmp * tmp #print "second: " + str(second) variance = first - second if (conf.DP and dp): #deltaf = 1/relays * 80 (80 is an arbitrary value for our.) d = float(200)/float(len(elist)) from numpy.random import laplace scale = float(d) / conf.EPSILON noise = laplace(0, scale) print "noise " + str(noise) noisy_var = variance + noise else: noisy_var=variance return str(noisy_var)
def median_operation(sk_sum, auths, dp): from numpy.random import laplace proto = Classes.get_median(sk_sum, min_b = 0, max_b = 1000, steps = 20) #Compute Median plain = None total_noise = 0 while True: v = proto.send(plain) if isinstance(v, int): break plain = collective_decryption(v, auths) if (conf.DP and dp): #print "sksum:" + str(sk_sum.epsilon) if sk_sum.epsilon != 0 and sk_sum.delta != 0: noise = 0 #print conf.DP and dp scale = float(sk_sum.d) / float(sk_sum.epsilon) #print sk_sum.d #print sk_sum.epsilon noise = int(round(laplace(0, scale))) #print "noise: " + str(noise) plain += noise total_noise += noise #print "*: " + str(plain) #print "Estimated median: " + str(v) #print "Total Noise Added: " + str(total_noise) return str(v)
def slaplace(nSources, nSamples): ''' Generates an nSources x nSamples array of sources which are Laplace distributed. p(x) = (1/sqrt(2))*exp(-sqrt(2)*x) ''' s = laplace(size=(nSources, nSamples)) return s / s.std(axis=1)[:, newaxis]
def slaplace(nSources,nSamples): ''' Generates an nSources x nSamples array of sources which are Laplace distributed. p(x) = (1/sqrt(2))*exp(-sqrt(2)*x) ''' s = laplace(size=(nSources,nSamples)) return s/s.std(axis=1)[:,newaxis]
def get_noise(yDim, xDim, white_noise_func_val): if white_noise_func_val == "Uniform": return rnd.random(size=(yDim, xDim)) elif white_noise_func_val == "Beta": return rnd.beta(2, 2, size=(yDim, xDim)) elif white_noise_func_val == "Normal": return rnd.normal(size=(yDim, xDim)) elif white_noise_func_val == "Exponential": return rnd.exponential(size=(yDim, xDim)) else: return rnd.laplace(size=(yDim, xDim))
def get_cropping_params(bb_loc_laplace_b_param=0.2, bb_size_laplace_b_param=0.06, bbox_shrinkage_limit=0.6, bbox_expansion_limit=1.4): '''Get random draws for parameters involved in random crops. Output those params Args: ---- bb_loc_laplace_b_param: Scale parameter in the laplace distribution that determines where to center the hypothetical bounding box used to create crop. Applied for draws of both x and y shifts. bb_size_laplace_b_param" Scale parameter in the laplace distribution that determines the size of the hypothetical bounding box used to create crop. Applied for draws of both x and y scaling. bbox_shrinkage_limit: The minimum size in each dimension of hypothetical bounding box for cropping, as a fraction of previous bounding box size in that dimension bbox_expansion_limit: Maximum size in each dimension of hypothetical bounding box for cropping, as a fraction of previous bounding box size in that dimension. Output: ------ x_center_shift: fraction of box width to shift center of bbox for next crop y_center_shift: fraction of box height to shift center of bbox for next crop x_size_shift: bbox width used to make crop, as frac of previous bbox width y_size_shift: bbox height used to make crop, as frac of previous bbox height ''' x_center_shift = laplace(scale=bb_loc_laplace_b_param) #delta_x in paper y_center_shift = laplace(scale=bb_loc_laplace_b_param) #delta_y in paper x_size_shift = (1 - laplace(scale=bb_size_laplace_b_param)) x_size_shift = np.clip(x_size_shift, bbox_shrinkage_limit, bbox_expansion_limit) y_size_shift = (1 - laplace(scale=bb_size_laplace_b_param)) y_size_shift = np.clip(y_size_shift, bbox_shrinkage_limit, bbox_expansion_limit) return x_center_shift, y_center_shift, x_size_shift, y_size_shift
def compute_average_BF(n, m, nsim, Xmean, Xstd, Ymean, Ystd, Ydist='Normal', thetaval = np.linspace(0.001, 60, 100), rdseed = 12231): if thetaval is None: ntheta = 1 else: ntheta = np.shape(thetaval)[0] BFmat = np.zeros((nsim, ntheta)) ProbM1mat = np.zeros((nsim, ntheta)) for ll in range(nsim): np.random.seed(rdseed) X = np.reshape(normal(Xmean, Xstd, n), (n, 1)) Zx = normal(Xmean, Xstd, int(m / 2)) if Ydist == 'Normal': Y = np.reshape(normal(Ymean, Ystd, n), (n, 1)) Zy = normal(Ymean, Ystd, int(m / 2)) elif Ydist == 'Laplace': Y = np.reshape(laplace(Ymean, Ystd, n), (n, 1)) Zy = laplace(Ymean, Ystd, int(m / 2)) else: raise NotImplementedError Z = np.reshape(np.concatenate((Zx, Zy)), (m, 1)) if thetaval is None: K = GaussianKernel() XY = np.reshape(np.concatenate((X, Y)), (2 * n, 1)) median_heuristic_theta = K.get_sigma_median_heuristic(XY) BF_val, prob_M1_val = compute_ProbM1(X, Y, Z, np.array([median_heuristic_theta]), Independent=True) else: BF_val, prob_M1_val = compute_ProbM1(X, Y, Z, thetaval, Independent=True) median_heuristic_theta = None BFmat[ll, :] = BF_val.reshape(-1) ProbM1mat[ll,:] = prob_M1_val.reshape(-1) rdseed += 1 return BFmat, ProbM1mat, median_heuristic_theta
def varRange(n): for i in range(1000): fillTable(normal(0, 1, n)) addRow("normal", n) for i in range(1000): fillTable(standard_cauchy(n)) addRow("cauchy", n) for i in range(1000): fillTable(laplace(0, 2**(-0.5), n)) addRow("laplace", n) for i in range(1000): fillTable(poisson(10, n)) addRow("poisson", n) for i in range(1000): fillTable(uniform(-1 * (3**0.5), 3**0.5, n)) addRow("uniform", n)
def find(data, goal_number, failure, eps, delta, promise=-1): # TODO docstring """ :param data: :param goal_number: :param failure: :param eps: :param delta: :param promise: :return: """ domain = abs(max(np.max(data, axis=0)) - min(np.min(data, axis=0))) if promise == -1: promise = __promise__(data, domain, eps, delta, failure) all_distances = distances(data) if __max_average_ball__(0, all_distances, goal_number) + laplace(0, 4/eps, 1) >\ goal_number - 2*promise - 4/eps*log(2/failure): return 0 extended_domain = 2**int(ceil(log2(domain))) max_averages_by_radius = [ __max_average_ball__(r, all_distances, goal_number) for r in arange(0, extended_domain, 0.5) ] def quality(d, r): try: return min( goal_number - max_averages_by_radius[r], max_averages_by_radius[2 * r] - goal_number + 4 * promise) / 2 except IndexError: raise IndexError('error while trying to qualify %f' % r) # TODO must complete those two def radius_interval_bounding(data_set, domain_end, j): return max( min(quality(data_set, i) for i in xrange(a, a + 2**j)) for a in xrange(domain_end - 2**j)) def max_radius_in_interval(data_set, i): return max(quality(data_set, r) for r in i) return evaluate(data, domain, quality, promise, 0.5, eps, delta, radius_interval_bounding, max_radius_in_interval)
def sub_sampling(point, num_of_samples, l_b=-1, u_b=1, scale=0.3): ''' :param point: one point :param number_of_samples: # of sample generated :param l_b: low bound of data :param u_b: upper bound of data :param scale: 1/2lambda*exp(-|x-\mu|/lambda) :return: ''' point_dimension = len(point) data = np.zeros((num_of_samples, point_dimension)) for i in range(num_of_samples): within_range = False while not within_range: cur_sample = laplace(point, scale*np.ones((point_dimension,))) if all([e <= u_b and e >= l_b for e in cur_sample]): within_range = True data[i,:] = cur_sample return data
def add_laplacian_noise(image, loc=0.0, scale=1.0): """Add Laplacian noise to image. It has probability density function f(x; mu, lambda) = 1 / (2 * lambda) * exp(-|x - mu| / lambda) Parameters ---------- image : numpy 2D array Input image loc : float The position, mu, of the distribution peak scale: float lambda, the exponential decay Returns ------- numpy 2D array Image with Laplacian noise """ noise = laplace(loc, scale, image.shape) return array(clip(image + noise, 0, 255), dtype=int)
def test_laplaces(): # laplace seems to work dist_list = [] param_list = [] myvar1 = 1 H = RA.uniform(-10, 10, (m, n)) # H is observed G = H + RA.laplace(0, myvar1, (m, n)) for j in range(n): dist_list.append('laplace') param_list.append([myvar1]) y = G @ x + RA.randn(m) myvar2 = 1 dist_list.append('laplace') param_list.append([myvar2]) lf = loglikelihood(H, y, dist_list, param_list) f, g = lf.func_and_grad(x) f1, _ = lf.func_and_grad(x + eps * e1) f2, _ = lf.func_and_grad(x + eps * e2) approx_grad = (1 / eps) * np.array([f1 - f, f2 - f]) print(g[0]) print(approx_grad)
def sample_and_encode(X, scale=1.): #X = diabetes.data #truth = numpy.zeros_like(X[0]) #y = diabetes.target truth = rng.laplace(0, scale, size=X.shape[1]) y = numpy.dot(X, truth) yc = y - y.mean() print print ' T |', '%6.3f |' % abs(truth).sum(), print' '.join('%6.3f' % b for b in truth) guess = numpy.zeros_like(yc) for i, (guess, beta) in enumerate(lmj.lars.lars(yc, X)): print '%2d |' % i, '%6.3f |' % abs(beta).sum(), print ' '.join('%6.3f' % b for b in beta), print '| yerr %.5f' % numpy.linalg.norm(yc - guess), print ' xerr %.5f' % numpy.linalg.norm(truth - beta) return yc, guess
def mean_operation(elist, auths, dp): G = EcGroup(nid=conf.EC_GROUP) esum = Classes.Ct.sum(elist) plain_sum = collective_decryption(esum, auths) mean = float(plain_sum)/float(len(elist)) if (conf.DP and dp): #deltaf = 1/relays * 80 (80 is an arbitrary value for our data.) d = float(100)/float(len(elist)) from numpy.random import laplace scale = float(d) / conf.EPSILON noise = laplace(0, scale) noisy_mean = mean + noise #print "noise " + str(noise) else: noisy_mean=mean return str(noisy_mean)
def laplace_mechanism(x: Union[int, float, ndarray], sensitivity: float, privacy_budget: PrivacyBudget) -> Union[float, ndarray]: """Differentially private Laplace mechanism. Add Laplacian noise to the value: .. math:: x + Laplace\left(\mu=0, \sigma=\\frac{\Delta f}{\epsilon}\\right) The result guarantees :math:`(\epsilon,0)`-differential privacy. :param x: Sensitive input data :param sensitivity: The global L1-sensitivity :math:`\Delta f` of `x` :param privacy_budget: The privacy budget :math:`(\epsilon,0)` used for the outputs :return: Input data protected by noise """ check_positive(privacy_budget.epsilon) check_positive(sensitivity) shape = None if isinstance(x, (int, float)) else x.shape noise = laplace(loc=0., scale=sensitivity / privacy_budget.epsilon, size=shape) return x + noise
def generate_data(D_x, D_y, N_int, N_ext, beta=[], sigma=0.1, DP_eps=1.0): if len(beta) == 0: beta = npr.randn(D_x, D_y) x_int = npr.randn(N_int, D_x) x_ext = npr.randn(N_ext, D_x) y_int = x_int.dot(beta) + sigma * npr.randn(N_int, D_y) y_ext = x_ext.dot(beta) + sigma * npr.randn(N_ext, D_y) xx_int = x_int.T.dot(x_int) / N_int xx_ext = x_ext.T.dot(x_ext) / N_ext xy_int = x_int.T.dot(y_int) / N_int xy_ext = x_ext.T.dot(y_ext) / N_ext DP_xx_scale = 1. / (DP_eps / 2.) DP_xy_scale = 2. * D_x**2 / (DP_eps / 2.) xx_ext_DP = xx_ext + sps.wishart.rvs( df=D_x + 1, scale=np.eye(D_x) * DP_xx_scale / N_ext) xy_ext_DP = xy_ext + npr.laplace(scale=DP_xy_scale / N_ext, size=(D_x, D_y)) return ({ 'N_ext': N_ext, 'N_int': N_int, 'D_x': D_x, 'D_y': D_y, 'DP_xx_scale': DP_xx_scale, 'DP_xy_scale': DP_xy_scale, 'xx_int': xx_int, 'xx_ext_DP': xx_ext_DP, 'xy_int': xy_int, 'xy_ext_DP': xy_ext_DP, }, { 'x_int': x_int, 'x_ext': x_ext, 'y_int': y_int, 'y_ext': y_ext, 'xx_ext': xx_ext, 'xy_ext': xy_ext, 'beta': beta })
def sample(self, size=None): """Samples from the distribution Keyword Arguments: size {int or tuple of ints} -- Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. If size is None (default), a single value is returned if loc and scale are both scalars. Otherwise, np.broadcast(loc, scale).size samples are drawn. (default: {None}) Returns: ndarray or scalar -- Drawn samples from the parameterized distribution. """ if self.distribution_type == DistributionType.UNIFORM: return random.uniform(self.low, self.high, size) elif self.distribution_type == DistributionType.GAUSSIAN: return random.normal(self.average, self.standard_deviation, size) elif self.distribution_type == DistributionType.LAPLACIAN: return random.laplace(self.average, self.standard_deviation, size) elif self.distribution_type == DistributionType.EXPONENTIAL: return int(math.ceil(random.exponential(self.beta))) elif self.distribution_type == DistributionType.CONSTANT: return self.value else: raise NotImplementedError("Distribution type not yet implemented")
def thresholdout(train, holdout, threshold, tolerance): if np.abs(train - holdout) < threshold + laplace(scale=tolerance): return train else: return holdout + laplace(scale=tolerance)
def __rec_sanitize__(samples, domain_range, alpha, beta, eps, delta, dimension): # print domain_range # print calls global calls global san_data # step 1 if calls == 0: return calls -= 1 # step 2 # the use of partial is redundant samples_domain_points = partial(points_in_subset, samples) noisy_points_in_range = samples_domain_points(subset=domain_range) + laplace(0, 1/eps, 1) sample_size = len(samples) # step 3 if noisy_points_in_range < alpha*sample_size/8: base_range = domain_range san_data.extend(base_range[1] * noisy_points_in_range) return san_data # step 4 domain_size = domain_range[1] - domain_range[0] + 1 log_size = int(ceil(log(domain_size, 2))) # not needed # size_tag = 2**log_size # step 6 def quality(data, j): return min(point_count_intervals_bounding(data, domain_range, j)-alpha * sample_size / 32, 3 * alpha * sample_size / 32 - point_count_intervals_bounding(data, domain_range, j-1)) # not needed if using exponential_mechanism # step 7 # promise = alpha * sample_size / 32 # step 8 new_eps = eps/3/log_star(dimension) # new_delta = delta/3/log_star(dimension) # note the use of exponential_mechanism instead of rec_concave z_tag = exponential_mechanism(samples, range(log_size+1), quality, new_eps) z = 2 ** z_tag # step 9 if z_tag == 0: point_counter = Counter(samples) def special_quality(data, b): return point_counter[b] b = choosing_mechanism(samples, range(domain_range[0], domain_range[1] + 1), special_quality, 1, alpha/64., beta, eps, delta) a = b # step 10 else: first_intervals = __build_intervals_set__(samples, 2*z, domain_range[0], domain_range[1] + 1) second_intervals = __build_intervals_set__(samples, 2*z_tag, domain_range[0], domain_range[1] + 1, True) intervals = [(i, i+2*z-1) for i in first_intervals+second_intervals] a, b = choosing_mechanism(samples, intervals, points_in_subset, 2, alpha/64., beta, eps, delta) if type(a) == str: raise ValueError("stability problem - choosing_mechanism returned 'bottom'") # step 11 # although not mentioned I assume the noisy value should be rounded noisy_count_ab = int(samples_domain_points((a, b)) + laplace(0, 1/eps, 1)) san_data.extend([b] * noisy_count_ab) # step 12 if a > domain_range[0]: rec_range = (domain_range[0], a - 1) __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension) if b < domain_range[1]: rec_range = (b + 1, domain_range[1]) __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension) return san_data
def laplace(self, mean, scale): '''Parameters:\n mean: float. scale: float, >=0. ''' return r.laplace(mean, scale, self.size)
return out if __name__ == "__main__": import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # Generate random samples for testing the algorithm Ndim = 3 Nsamples = 1000 # Generate random points . . . some Gausian some Laplacian mean = np.array(Ndim * [5.0,]) cov = sp.eye(Ndim) + 5.0 samples = random.multivariate_normal(mean, cov, Nsamples) samples[:50] = random.laplace(5, scale =.0000001, size=(50, mean.size)) # Make a 3D figure fig = plt.figure(0) fig.clear() ax = fig.add_subplot(1,1,1, projection='3d') ax.scatter(samples[:,0], samples[:,1], samples[:,2], marker='.')
def no_test_DP_median(): d, w = 25, 7 print("Sketch: d=%s w=%s (Cmp. size: %s%%)" % (d, w, (float(100*d*w)/1000))) # Setup the crypto G = EcGroup() sec = G.order().random() y = sec * G.generator() # Get some test data narrow_vals = 1000 wide_vals = 200 from numpy.random import laplace from collections import defaultdict datapoints = defaultdict(list) eps = ["Inf", 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001] for epsilon in eps: print(epsilon) for _ in range(40): vals = [gauss(300, 25) for _ in range (narrow_vals)] vals += [gauss(500, 200) for _ in range (wide_vals)] vals = sorted([int(v) for v in vals]) median = vals[int(len(vals) / 2)] # Add each sample to the sketch cs = CountSketchCt.epsilondelta(0.05, 0.05, y) # CountSketchCt(d, w, y) for x in vals: cs.insert("%s" % x) # Now use test the median function proto = get_median(cs, min_b = 0, max_b = 1000, steps = 20) plain = None no_decryptions = 0 while True: v = proto.send(plain) if isinstance(v, int): break no_decryptions += 1 noise = 0 if isinstance(epsilon, float): scale = float(d) / epsilon noise = int(round(laplace(0, scale))) plain = v.dec(sec) + noise print("Estimated median: %s\t\tAbs. Err: %s" % (v, abs(v - median))) datapoints[epsilon] += [ abs(v - median) ] import matplotlib.pyplot as plt from numpy import mean, std upper_err = [] core_err = [] lower_err = [] for e in eps: samples = sorted(datapoints[e]) core_err.append(mean(samples)) upper_err.append(mean(samples) + std(samples) / (len(samples)**0.5)) lower_err.append(mean(samples) - std(samples) / (len(samples)**0.5)) eps_lab = range(len(eps)) eps = ["Inf"] + [e * 10 for e in eps][1:] plt.plot(eps_lab, core_err, label="Absolute Error") plt.yscale('log') # v = v_issue / (len(cnt_issue)**0.5) plt.fill_between(x=eps_lab, y1=lower_err, y2=upper_err, alpha=0.2, color="b") plt.xticks(eps_lab, eps) plt.xlabel(r'Differential Privacy parameter (epsilon)') plt.ylabel('Absolute Error (mean & std. of mean)') plt.title(r'Median Estimation - Quality vs. Protection') # plt.axis([1, 10, 0, 1700]) # plt.grid(True) plt.savefig("Quality.pdf") # plt.show() plt.close()
total = 0; # 这是一个全局变量 # 可写函数说明 def sum2( arg1, arg2 ): #返回2个参数的和." total = arg1 + arg2; # total在这里是局部变量. print ("函数内是局部变量 : ", total) return total #调用sum函数 total = sum2( 10, 20 ) print ("函数外是全局变量 : ", total) # global 和 nonlocal关键字可以改变作用域 print(nmr.laplace(1.1)) ## python3 数据结构 #列表list可以修改,tuple和字符串不行 A = [12,3,45.6] print(A.count('str')) A.append(17.9) print(A) A.sort(reverse=False) print(A) A.sort(reverse=True) print(A) queue_me = deque(['1', 'aeee', 'dugu']) print(queue_me.popleft()) queue_me.append('ssss') queue_me.append('sad')
def Laplace_noise(privacy_bugdet, L1_sensitivity, dim): return rn.laplace(0, L1_sensitivity / np.float(privacy_bugdet), dim)
def gen_gaussian_linear_data(n=10, d=100, norm_beta=1, beta=None, var_eps=0.1, s=None, seed=1, shift_type='None', shift_val=0.1, logistic=False): '''Generate data n : int number of samples d : int dimension norm_beta: float norm of beta var_eps: float variance of epsilon snr = norm_beta^2 / var_eps ''' npr.seed(seed=seed) # x x = npr.randn(n, d) if 'shift' in shift_type: x += shift_val elif 'scale' in shift_type: S2 = np.cumsum(np.ones(d)) S2 /= np.sum(S2) S2 = np.diag(np.sqrt(S2 * d)) x = x @ S2 elif 'spike' in shift_type: v = np.ones(d) v /= npl.norm(v) S = np.eye(d) + (np.sqrt(shift_val) - 1) * np.outer(v, v) x = x @ S elif shift_type == 'lap': x = npr.laplace(size=(n, d)) # beta if s == None: s = d if beta is None: beta = np.zeros(d) beta[:s] = npr.randn(s) beta[:s] /= npl.norm(beta[:s]) if norm_beta == 'd': norm_beta = d beta[:s] *= norm_beta var_mult = 0 if var_eps == 0 else np.sqrt(var_eps) eps = var_mult * npr.randn(n) y = x @ beta + eps if logistic: # pass through an inv-logit function pr = 1 / (1 + np.exp(-y)) # binomial distr (bernoulli response var) # n trials, probability p z = np.random.uniform(size=n) # random number 0-1 y = (z <= pr).astype(np.int32) return x, y, beta
def Lap(mu,lamb): return rand.laplace(mu,lamb);
def slaplace(nSources, nSamples): """ Generates an nSources x nSamples array of sources which are Laplace distributed. """ s = laplace(size=(nSources, nSamples)) return s / s.std(axis=1)[:, newaxis]