def mean_estimation_experiment(): # generated data data = np.clip(np.random.normal(loc=0.5, scale=0.2, size=[100000]), 0, 1) print("this is generated data\n", data) discretized_data = [ dp.discretization(value=value, lower=0, upper=1) for value in data ] print("this is discretized data\n", discretized_data) mean = np.average(data) print("the mean_solutions of original data is: ", mean) mean_d = np.average(discretized_data) print("the mean_solutions of discretized data is: ", mean_d) epsilon = 1 # dp_data = [dp.random_response_old(B=value, p=dp.eps2p(epsilon)) for value in discretized_data] dp_data = [ dp.random_response(bits=value, p=dp.eps2p(epsilon)) for value in discretized_data ] est_one = dp.random_response_reverse(data_list=np.asarray(dp_data), p=dp.eps2p(epsilon)) est_mean = est_one / len(dp_data) print("the estimated mean_solutions is: ", est_mean)
def random_response_for_hist(user_vector, epsilon): """ 每个用户对自己的数据进行random response操作 :param user_vector: [0,0,0,0,...,0,0,1,0,0,...] :param epsilon: privacy budget :return: [1,0,0,1,1,...] """ return ldplib.random_response(bit_array=user_vector, p=ldplib.eps2p(epsilon=epsilon / 2))
def random_response_for_hist(user_vector, epsilon): """ 每个用户对自己的数据进行random response操作 :param user_vector: [0,0,0,0,...,0,0,1,0,0,...] :param epsilon: privacy budget :return: [1,0,0,1,1,...] """ return dp.random_response(data=user_vector, p=dp.epsilon2probability(epsilon=epsilon / 2))
def run(data): """ this is a frequency estimation example """ frequency = np.sum(data) / len(data) print("baseline frequency = ", frequency) epsilon = 1 p = ldplib.eps2p(epsilon) perturbed_data = ldplib.random_response(bit_array=data, p=p) f = np.sum(perturbed_data) / len(perturbed_data) estimated_frequency = (f + p - 1) / (2 * p - 1) print("estimated frequency = ", estimated_frequency) print("estimation error = ", np.fabs(estimated_frequency - frequency))
def my_test_for_random_response_pq(): """ To test following function @random_response_pq and function @random_response_pq_reverse :return: """ original_data_list = np.random.binomial(1, 0.8, size=[1000000]).reshape([100000,10]) # print(original_data_list) original_sum = np.sum(original_data_list, axis=0) print(original_sum) p, q = 0.9, 0.1 perturbed_data_list = [dp.random_response(data=original_data_list[i], p=p, q=q) for i in range(len(original_data_list))] perturbed_sum = np.sum(np.asarray(perturbed_data_list), axis=0) print(perturbed_sum) adjust_sum = dp.random_response_reverse(data_list=np.asarray(perturbed_data_list), p=p, q=q) print(adjust_sum)
def kv_en_onehot(kv, epsilon): """ encode a kv into [a,b,c], where: a=1 represents if the k == 0 b represents if v == -1 c represents if v == 1 """ k, v = int(kv[0]), kv[1] onehot = np.zeros([3]) if k == 0: onehot[0] = 1 else: d_v = ldplib.discretization(v, -1, 1) if d_v == -1: onehot[1] = 1 else: onehot[2] = 1 return ldplib.random_response(bit_array=onehot, p=ldplib.eps2p(epsilon / 2))
def encode(self, v): bv = np.where(v >= self.r, 1, 0) return ldplib.random_response(bit_array=bv, p=(self.p+1)/2)
def kv_en_f2m(kv, epsilon_k, epsilon_v, method, set_value=0): v = kv[1] if kv[0] == 1 else set_value p_k = ldplib.random_response(data=int(kv[0]), p=ldplib.epsilon2probability(epsilon_k)) p_v = method(v, epsilon_v) return p_k, p_v