Esempio n. 1
0
def mean_estimation_experiment():
    # generated data
    data = np.clip(np.random.normal(loc=0.5, scale=0.2, size=[100000]), 0, 1)
    print("this is generated data\n", data)

    discretized_data = [
        dp.discretization(value=value, lower=0, upper=1) for value in data
    ]
    print("this is discretized data\n", discretized_data)

    mean = np.average(data)
    print("the mean_solutions of original data is: ", mean)

    mean_d = np.average(discretized_data)
    print("the mean_solutions of discretized data is: ", mean_d)

    epsilon = 1

    # dp_data = [dp.random_response_old(B=value, p=dp.eps2p(epsilon)) for value in discretized_data]
    dp_data = [
        dp.random_response(bits=value, p=dp.eps2p(epsilon))
        for value in discretized_data
    ]
    est_one = dp.random_response_reverse(data_list=np.asarray(dp_data),
                                         p=dp.eps2p(epsilon))
    est_mean = est_one / len(dp_data)

    print("the estimated mean_solutions is: ", est_mean)
Esempio n. 2
0
def kv_de_f2m(p_kv_list: np.ndarray, epsilon_k, set_value=0):
    if not isinstance(p_kv_list, np.ndarray):
        raise Exception("type error of p_kv_list: ", type(p_kv_list))
    f = np.average(p_kv_list[:, 0])
    p = ldplib.eps2p(epsilon=epsilon_k)
    f = (p-1+f) / (2*p-1)
    m_all = np.average(p_kv_list[:, 1])
    m = (m_all - (1 - f) * set_value) / f
    return f, m
Esempio n. 3
0
def random_response_for_hist(user_vector, epsilon):
    """
    每个用户对自己的数据进行random response操作
    :param user_vector: [0,0,0,0,...,0,0,1,0,0,...]
    :param epsilon: privacy budget
    :return: [1,0,0,1,1,...]
    """
    return ldplib.random_response(bit_array=user_vector,
                                  p=ldplib.eps2p(epsilon=epsilon / 2))
Esempio n. 4
0
def kv_de_bisample(p_kv_list: np.ndarray, epsilon):
    pos_values = p_kv_list[p_kv_list[:, 0] == 1]
    neg_values = p_kv_list[p_kv_list[:, 0] == 0]
    f_pos = np.average(pos_values[:, 1])
    f_neg = np.average(neg_values[:, 1])

    p = ldplib.eps2p(epsilon)

    f = (2 * p - 2 + f_pos + f_neg) / (2 * p - 1)
    m = (f_pos - f_neg) / (f_pos + f_neg + 2 * p - 2)
    return f, m
Esempio n. 5
0
def run(data):
    """
    this is a frequency estimation example
    """
    frequency = np.sum(data) / len(data)
    print("baseline frequency = ", frequency)

    epsilon = 1
    p = ldplib.eps2p(epsilon)
    perturbed_data = ldplib.random_response(bit_array=data, p=p)
    f = np.sum(perturbed_data) / len(perturbed_data)
    estimated_frequency = (f + p - 1) / (2 * p - 1)
    print("estimated frequency = ", estimated_frequency)
    print("estimation error = ", np.fabs(estimated_frequency - frequency))
Esempio n. 6
0
def bisample_decode(p_val_lst, epsilon):
    val_lst = np.asarray(p_val_lst)

    pos_lst = val_lst[val_lst[:, 0] == 1]
    neg_lst = val_lst[val_lst[:, 0] == 0]
    pos_val = pos_lst[:, 1]
    neg_val = neg_lst[:, 1]

    f_pos = 1.0 * sum(pos_val) / len(pos_val)
    f_neg = 1.0 * sum(neg_val) / len(neg_val)

    p = ldplib.eps2p(epsilon)
    m = (f_pos - f_neg) / (f_pos + f_neg + 2 * p - 2)
    mr = (1 - f_pos - f_neg) / (2 * p - 1)
    return mr, m
Esempio n. 7
0
def kv_en_onehot(kv, epsilon):
    """
    encode a kv into [a,b,c], where:
        a=1 represents if the k == 0
        b represents if v == -1
        c represents if v == 1
    """
    k, v = int(kv[0]), kv[1]
    onehot = np.zeros([3])
    if k == 0:
        onehot[0] = 1
    else:
        d_v = ldplib.discretization(v, -1, 1)
        if d_v == -1:
            onehot[1] = 1
        else:
            onehot[2] = 1
    return ldplib.random_response(bit_array=onehot,
                                  p=ldplib.eps2p(epsilon / 2))
Esempio n. 8
0
def kv_en_f2m(kv, epsilon_k, epsilon_v, method, set_value=0):
    v = kv[1] if kv[0] == 1 else set_value
    p_k = ldplib.random_response_old(bits=int(kv[0]), p=ldplib.eps2p(epsilon_k))
    p_v = method(v, epsilon_v)
    return p_k, p_v
Esempio n. 9
0
    每个用户对自己的数据进行random response操作
    :param user_vector: [0,0,0,0,...,0,0,1,0,0,...]
    :param epsilon: privacy budget
    :return: [1,0,0,1,1,...]
    """
    return ldplib.random_response(bit_array=user_vector,
                                  p=ldplib.eps2p(epsilon=epsilon / 2))


if __name__ == '__main__':
    # 生成数据
    n = 1000000
    data_list = generate_data(user_number=n)
    # 隐私参数
    epsilon = 1
    p = ldplib.eps2p(epsilon / 2)

    # 得到原始数据的直方图
    original_hist = np.sum(data_list, axis=0)
    print("this is original hist: \n", original_hist)
    # guessed_hist = original_hist * (2*p-1) + n * (1-p)
    # print("this is guessed hist: \n", guessed_hist)

    # aggregator收集并处理数据
    perturbed_data = np.asarray(
        [random_response_for_hist(user, epsilon) for user in data_list])
    perturbed_hist = np.sum(perturbed_data, axis=0)
    print("this is the hist by the aggregator: \n", perturbed_hist)

    # aggregator校正数据
    '''
Esempio n. 10
0
    # 得到原始数据的直方图
    original_hist = np.sum(data_list, axis=0)
    print("this is original hist: \n", original_hist)

    # 隐私参数
    epsilon = 1

    # aggregator收集并处理数据
    perturbed_data_list = np.asarray(
        [random_response_for_hist(user, epsilon) for user in data_list])
    print("this is the hist by the aggregator: \n",
          np.sum(perturbed_data_list, axis=0))

    # aggregator校正数据
    # estimate_hist = [dp.random_response_adjust(rr_sum, len(data_list), epsilon / 2) for rr_sum in rr_sums]
    # print(np.sum(estimate_hist))

    estimate_hist = dp.random_response_reverse(data_list=perturbed_data_list,
                                               p=dp.eps2p(epsilon=epsilon / 2))

    # 展示原始数据的直方图
    print("this is estimated hist: \n", estimate_hist)

    # 画图
    fig = plt.figure(figsize=[12, 5])
    ax1 = fig.add_subplot(121)  # 2*2的图形 在第一个位置
    ax1.bar(range(len(original_hist)), original_hist)
    ax2 = fig.add_subplot(122)
    ax2.bar(range(len(estimate_hist)), estimate_hist)
    plt.show()