Ejemplo n.º 1
0
def optimal_factor(dist1,
                   dist2,
                   number_of_buckets,
                   x_range=50,
                   infty_budget=None):
    ## By default only tolerate factors for which the infinity bucket plus distinguishing events is <= delta(20).
    ## To find a better factor, we sometimes want to tolerate more than delta(20), e.g., for Gaussians.
    if infty_budget is None:
        infty_budget = true_delta(dist1, dist2, e_eps=np.exp(20))
    dist1 = np.asarray(dist1)
    dist2 = np.asarray(dist2)
    (dist1_sanitized, dist2_sanitized) = sanitize_distributions(dist1, dist2)
    ## We can compute the mu and sigma without calling PB.
    mu = np.sum(
        np.multiply(np.log(dist1_sanitized / dist2_sanitized),
                    dist1_sanitized))
    sigma = np.sqrt(
        np.sum(
            np.multiply(
                np.power((np.log(dist1_sanitized / dist2_sanitized) - mu), 2),
                dist1_sanitized)))
    ## Look for the factor that causes the smallest error: delta / true_delta
    factor_candidate = lambda x: np.exp(
        (mu + x * sigma) / float(number_of_buckets))
    module_params = {'dist1_array' : dist1_sanitized\
                    ,'dist2_array' : dist2_sanitized\
                    ,'number_of_buckets' : number_of_buckets\
                    ,'error_correction' : False\
                    }
    errors = []
    ## Try for different x from 1/x_range to x_range
    for x in np.linspace(1 / float(x_range), x_range, 20, endpoint=True):
        module_params['factor'] = factor_candidate(x)
        instance = PB(**module_params)
        ## only consider those factors where the infinity bucket is smaller than
        if instance.infty_bucket + instance.distinguishing_events <= infty_budget:
            errors.append((instance.delta_of_eps_upper_bound(0) /
                           true_delta(dist1_sanitized, dist2_sanitized), x))
            # errors.append((instance.delta_of_eps_upper_bound(0) / true_delta(dist1, dist2), \
            #                 x, factor_candidate(x), instance.delta_of_eps_upper_bound(0), true_delta(dist1, dist2)))
        del instance
    # print([(x[0],x[2],x[3],x[4]) for x in errors])
    errors.sort(key=lambda x: x[0])
    if errors != []:
        x = errors[0][1]
    else:
        raise ValueError("\
No factor found. Infinity bucket + mass of distinguishing events > delta(20) (eps = 20).\
Try a higher number of buckets (keyword 'number_of_buckets' in the PB-constructor) \
or a wider range for x (keyword 'x_range', default is 50).")
    return factor_candidate(x)
Ejemplo n.º 2
0
def compose(distribution_a, distribution_b, compositions=1):
    privacybuckets = ProbabilityBuckets(
        number_of_buckets=100000,  # number of buckets. The more the better as the resolution gets more finegraind
        factor=1 + 1e-5,  # depends on the number_of_buckets and is the multiplicative constant between two buckets.
        dist1_array=distribution_a,  # distribution A
        dist2_array=distribution_b,  # distribution B
        caching_directory="./pb-cache",  # caching makes re-evaluations faster. Can be turned off for some cases.
        free_infty_budget=10 ** (-20),  # how much we can put in the infty bucket before first squaring
        error_correction=True,  # error correction. See publication for details
    )

    # Now we evaluate how the distributon looks after 2**k independent compositions
    # input can be arbitrary positive integer, but exponents of 2 are numerically the most stable
    composed = privacybuckets.compose(compositions)
    composed.print_state()
    return composed
Ejemplo n.º 3
0
def compute_initial_bounds(module_params):
    if 'verbose' in module_params:
        verbose = module_params['verbose']
    else:
        verbose = False
    instance = PB(**module_params)
    true_delta_value = true_delta(module_params['dist1_array'],
                                  module_params['dist2_array'])
    error = instance.delta_of_eps_upper_bound(0) / true_delta_value
    infty_bucket = instance.infty_bucket + instance.distinguishing_events
    # infty_bucket_ratio = infty_bucket/true_delta_value
    if verbose:
        print("[*] FACTOR:\t\t\t\t 1 + %.20f" %
              (module_params['factor'] - 1.0))
        print("[*] INFTY:\t\t\t\t %.20f" % infty_bucket)
        print("[*] ERROR:\t\t\t\t %.20f" % error)
    return error, infty_bucket, true_delta_value
Ejemplo n.º 4
0
# plt.plot(x_axis, truncatedBinA, label='Truncated Binomial A')
# plt.plot(x_axis, truncatedBinB, label='Truncated Binomial B')

plt.title("Input distributions (near mean)")
plt.xlabel("Noise")
plt.ylabel("mass")
plt.xlim(-2 * scale, 2 * scale)
plt.legend()
plt.show()

# # Initialize privacy buckets for gauss.
privacybucketsG = ProbabilityBuckets(
    number_of_buckets=100000,  # number of buckets. The more the better as the resolution gets more finegraind
    factor=1 + 1e-4,  # depends on the number_of_buckets and is the multiplicative constant between two buckets.
    dist1_array=gaussA,  # distribution A
    dist2_array=gaussB,  # distribution B
    caching_directory="./pb-cacheGauss",  # caching makes re-evaluations faster. Can be turned off for some cases.
    free_infty_budget=10 ** (-20),  # how much we can put in the infty bucket before first squaring
    error_correction=True,  # error correction. See publication for details
)

# Now we evaluate how the distributon looks after 2**k independent compositions
# input can be arbitrary positive integer, but exponents of 2 are numerically the most stable
privacybuckets_composedG = privacybucketsG.compose(compositions)

# Print status summary
privacybuckets_composedG.print_state()

# # Initialize privacy buckets for binomial.
privacybucketsB2 = ProbabilityBuckets(
    number_of_buckets=100000,  # number of buckets. The more the better as the resolution gets more finegraind
Ejemplo n.º 5
0
distribution_A = np.array([0, 1. / norm, expeps / norm, delta])
distribution_B = np.array([delta, expeps / norm, 1. / norm, 0])

# Important: the individual input distributions neede each to sum up to 1 exactly!
distribution_A = distribution_A / np.sum(distribution_A)
distribution_B = distribution_B / np.sum(distribution_B)

# Initialize privacy buckets.
privacybuckets = ProbabilityBuckets(
    number_of_buckets=
    100000,  # number of buckets. The more the better as the resolution gets more finegraind
    factor=1 +
    1e-4,  # depends on the number_of_buckets and is the multiplicative constant between two buckets.
    dist1_array=distribution_A,  # distribution A
    dist2_array=distribution_B,  # distribution B
    caching_directory=
    "./pb-cache",  # caching makes re-evaluations faster. Can be turned off for some cases.
    free_infty_budget=10**(
        -20),  # how much we can put in the infty bucket before first squaring
    error_correction=True,  # error correction. See publication for details
)

# Now we evaluate how the distributon looks after 2**k independent compositions
k = 13
# input can be arbitrary positive integer, but exponents of 2 are numerically the most stable
privacybuckets_composed = privacybuckets.compose(2**k)

# Print status summary
privacybuckets_composed.print_state()
Ejemplo n.º 6
0
def constructPB(module_params, n, endpoint=None):
    module_params = module_params.copy()

    # Construct numerical adp bounds

    module_params.update({
        'free_infty_budget': 10**(-20),
        'caching_directory': "./pb-cache",
        'error_correction': True,
    })

    if 'factor' not in module_params:
        module_params['factor'] = get_sufficient_big_factor(
            module_params,
            max_comps=n,
            target_delta=3**np.log2(n) * delta_dist_events(
                module_params['dist1_array'], module_params['dist2_array']))

    pb = ProbabilityBuckets(**module_params)
    pbn = pb.compose(n)

    # Prepare the data for plots

    plots = {
        'pbup': {
            'name': "PB upper bound",
            'color': "brown",
            'linestyle': "solid"
        },
        'pblow': {
            'name': "PB lower bound",
            'color': "blue",
            'linestyle': "dashed"
        },
        'cdp': {
            'name': "CDP",
            'color': "green",
            'linestyle': "dotted"
        },
        'rdp': {
            'name': "RDP",
            'color': "red",
            'linestyle': "dashdot"
        }
    }

    central_moment = lambda p, mean, lam: np.log(module_params[
        'factor']) * np.sum(
            np.multiply(np.power(np.arange(len(p)) - len(p) / 2 - mean, lam), p
                        ))

    pbn_mean = central_moment(pbn.bucket_distribution, 0, 1)
    pbn_sigma = (central_moment(pbn.bucket_distribution, pbn_mean,
                                2))**(1 / 2.0)

    eps_granularity = 2000
    eps_vector = np.linspace(0,
                             pbn_mean + 1 * pbn_sigma,
                             eps_granularity,
                             endpoint=True)

    # What are you doing here?
    if endpoint is None:
        plots['pbup']['ydata'] = [
            pbn.delta_of_eps_upper_bound(eps) for eps in eps_vector
        ]

        xlast = 1
        count = 0
        for index in range(len(plots['pbup']['ydata'])):
            x = plots['pbup']['ydata'][index]
            if xlast / x < 1.005:
                count += 1
            else:
                count = 0
            if count >= eps_granularity // 30 or eps_vector[index] > 1:
                endpoint = eps_vector[index]
                break
            xlast = x

    eps_vector = np.linspace(0, endpoint, 100, endpoint=True)

    # PB
    plots['pbup']['ydata'] = np.asarray(
        [pbn.delta_of_eps_upper_bound(eps) for eps in eps_vector])
    plots['pblow']['ydata'] = np.asarray(
        [pbn.delta_of_eps_lower_bound(eps) for eps in eps_vector])

    # # Renyi-DP
    # plots['rdp']['ydata'] = reny_delta_of_eps_efficient(eps_vector, n, pb.bucket_distribution, pb.log_factor, None)

    return plots, eps_vector, pb, pbn
Ejemplo n.º 7
0
# infitity_bucket and the minus_n-bucket. Therefore for an o
#
#       L_A/B(o) = log(factor) * number_of_buckets / 2
#
# then the mass Pr[ L_A/B(o) > mass_infinity_bucket, o <- distribution_A] will be put into the infinity bucket.
# The infinity-bucket gros exponentially with the number of compositions. Chose the factor according to the
# probability mass you want to tolerate in the inifity bucket. For this example, we set it magically to
factor = 1 + 1e-4

# Initialize privacy buckets.
privacybuckets = ProbabilityBuckets(
    number_of_buckets=number_of_buckets,
    factor=factor,
    dist1_array=distribution_A,  # distribution A
    dist2_array=distribution_B,  # distribution B
    caching_directory=
    "./pb-cache",  # caching makes re-evaluations faster. Can be turned off for some cases.
    free_infty_budget=10**(
        -20),  # how much we can put in the infty bucket before first squaring
    error_correction=True,  # error correction. See publication for details
)

# Print status summary
privacybuckets.print_state()

# Now we build the RDP(alpha) graphs from the computed distribution.
alpha_vec = np.linspace(1, 100, 200)
upper_bound = [
    privacybuckets.renyi_divergence_upper_bound(alpha) for alpha in alpha_vec
]
analytic = [
Ejemplo n.º 8
0
# The infinity-bucket gros exponentially with the number of compositions. Chose the factor according to the
# probability mass you want to tolerate in the inifity bucket. For this example, the minimal factor should be
#
#       log(factor) > eps
#
# as for randomized response, there is no privacy loss L_A/B greater than epsilon (excluding delta/infinity-bucket).
# We set the factor to
factor = 1 + 1e-4

# Initialize privacy buckets.
privacybuckets = ProbabilityBuckets(
    number_of_buckets=number_of_buckets,
    factor=factor,
    dist1_array=distribution_A,  # distribution A
    dist2_array=distribution_B,  # distribution B
    caching_directory=
    "./pb-cache",  # caching makes re-evaluations faster. Can be turned off for some cases.
    free_infty_budget=10**(
        -20),  # how much we can put in the infty bucket before first squaring
    error_correction=True,  # error correction. See publication for details
)

# Now we evaluate how the distributon looks after 2**k independent compositions
k = 13
# input can be arbitrary positive integer, but exponents of 2 are numerically the most stable
privacybuckets_composed = privacybuckets.compose(2**k)

# Print status summary
privacybuckets_composed.print_state()

# Now we build the delta(eps) graphs from the computed distribution.
# probability mass you want to tolerate in the inifity bucket. For this example, we set it magically to
factor = 1 + 1e-3

# Initialize privacy buckets.
kwargs = {
    'number_of_buckets': number_of_buckets,
    'factor': factor,
    'caching_directory':
    "./pb-cache",  # caching makes re-evaluations faster. Can be turned off for some cases.
    'free_infty_budget':
    10**(-20),  # how much we can put in the infty bucket before first squaring
    'error_correction': False,  # True is not supported.
}

pb = ProbabilityBuckets(
    dist1_array=distribution_A,  # distribution A
    dist2_array=distribution_B,  # distribution B
    **kwargs)

# Print status summary
pb.print_state()

# the delta we are going to reconstruct the privacy loss distribution from.
if DP_TYPE == 'adp':
    delta_func = pb.delta_ADP
if DP_TYPE == 'pdp':
    delta_func = pb.delta_PDP

print("[*] reconstructing..")

pb_reconstructed = ProbabilityBuckets_fromDelta(delta_func=delta_func,
                                                DP_type=DP_TYPE,