Exemple #1
0
def __rec_concave_basis__(range_max_value,
                          quality_function,
                          eps,
                          data,
                          bulk=False):
    """recursion basis for the reconcave procedure - execute the exponential mechanism
    note that the parameters r,alpha,delta and N are not being used
    reconcave_basis(solution set size, quality function of sensitivity 1, eps privacy parameter, solution set)
    """
    return basicdp.exponential_mechanism(data, range(int(range_max_value) + 1),
                                         quality_function, eps, bulk)
Exemple #2
0
def __rec_sanitize__(samples, domain_range, alpha, beta, eps, delta, dimension):
    # print domain_range
    # print calls
    global calls
    global san_data
    # step 1
    if calls == 0:
        return
    calls -= 1

    # step 2
    # the use of partial is redundant
    samples_domain_points = partial(points_in_subset, samples)
    noisy_points_in_range = samples_domain_points(subset=domain_range) + laplace(0, 1/eps, 1)
    sample_size = len(samples)

    # step 3
    if noisy_points_in_range < alpha*sample_size/8:
        base_range = domain_range
        san_data.extend(base_range[1] * noisy_points_in_range)
        return san_data

    # step 4
    domain_size = domain_range[1] - domain_range[0] + 1
    log_size = int(ceil(log(domain_size, 2)))
    # not needed
    # size_tag = 2**log_size

    # step 6

    def quality(data, j):
        return min(point_count_intervals_bounding(data, domain_range, j)-alpha * sample_size / 32,
                3 * alpha * sample_size / 32 - point_count_intervals_bounding(data, domain_range, j-1))

    # not needed if using exponential_mechanism
    # step 7
    # promise = alpha * sample_size / 32

    # step 8
    new_eps = eps/3/log_star(dimension)
    # new_delta = delta/3/log_star(dimension)
    # note the use of exponential_mechanism instead of rec_concave
    z_tag = exponential_mechanism(samples, range(log_size+1), quality, new_eps)
    z = 2 ** z_tag

    # step 9
    if z_tag == 0:
        point_counter = Counter(samples)

        def special_quality(data, b):
            return point_counter[b]

        b = choosing_mechanism(samples, range(domain_range[0], domain_range[1] + 1), special_quality,
                               1, alpha/64., beta, eps, delta)
        a = b
    # step 10
    else:
        first_intervals = __build_intervals_set__(samples, 2*z, domain_range[0], domain_range[1] + 1)
        second_intervals = __build_intervals_set__(samples, 2*z_tag, domain_range[0], domain_range[1] + 1, True)
        intervals = [(i, i+2*z-1) for i in first_intervals+second_intervals]
        a, b = choosing_mechanism(samples, intervals, points_in_subset, 2, alpha/64., beta, eps, delta)

    if type(a) == str:
        raise ValueError("stability problem - choosing_mechanism returned 'bottom'")

    # step 11
    # although not mentioned I assume the noisy value should be rounded
    noisy_count_ab = int(samples_domain_points((a, b)) + laplace(0, 1/eps, 1))
    san_data.extend([b] * noisy_count_ab)

    # step 12
    if a > domain_range[0]:
        rec_range = (domain_range[0], a - 1)
        __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension)
    if b < domain_range[1]:
        rec_range = (b + 1, domain_range[1])
        __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension)
    return san_data
Exemple #3
0
def __rec_concave_basis__(range_max_value, quality_function, eps, data, bulk=False):
    """recursion basis for the reconcave procedure - execute the exponential mechanism
    note that the parameters r,alpha,delta and N are not being used
    reconcave_basis(solution set size, quality function of sensitivity 1, eps privacy parameter, solution set)
    """
    return basicdp.exponential_mechanism(data, range(int(range_max_value)+1), quality_function, eps, bulk)
Exemple #4
0
def evaluate(data, range_max_value, quality_function, quality_promise,
             approximation, eps, delta, recursion_bound, bulk=False):
    # TODO fix so it will work
    # TODO add docstring
    # TODO go through variables names
    if recursion_bound == 1 or range_max_value <= 32:
        return __rec_concave_basis__(range_max_value, quality_function, eps, data, bulk)
    else:
        recursion_bound -= 1

    # step 2
    print "step 2"
    log_of_range = int(math.ceil(math.log(range_max_value, 2)))
    range_max_value_tag = 2 ** log_of_range

    if bulk:
        qualities = quality_function(data, range(int(range_max_value)+1))
    else:
        qualities = [quality_function(data, i) for i in range(int(range_max_value)+1)]
    qualities.extend([min(0, qualities[range_max_value]) for _ in xrange(range_max_value, range_max_value_tag)])
    
    def extended_quality_function(j):
        return qualities[j]

    # same but with signature that fits exponential mechanism requirements (used in step 10)
    def extended_quality_function_for_exponential_mechanism(data_set, j):
        return qualities[j]

    # step 3
    print "step 3"

    def intervals_bounding(j):
        if j == log_of_range+1:
            return min(0, intervals_bounding(log_of_range))
        return max(min(extended_quality_function(e) for e in xrange(a, a+2**j-1))
                   for a in xrange(0, range_max_value_tag-2**j+1))

    # step 4
    print "step 4"

    def recursive_quality_function(data_base, range_element):
        return min(intervals_bounding(range_element) - (1 - approximation) * quality_promise,
                   quality_promise - intervals_bounding(range_element + 1))

    # step 5
    print "step 5"
    recursive_quality_promise = quality_promise * approximation / 2
        
    # step 6 - recursion call
    print "step 6 - recursive call"
    recursion_returned = evaluate(data, log_of_range, recursive_quality_function, recursive_quality_promise, 1/4,
                                  eps, delta, recursion_bound, True)
        
    good_interval = 8 * (2 ** recursion_returned)
    print "good interval: %d" % good_interval

    # step 7
    print "step 7"
    first_intervals = [range(range_max_value_tag)[i:i + good_interval]
                       for i in xrange(0, range_max_value_tag, good_interval)]
    
    second_intervals = [range(good_interval/2, range_max_value_tag)[i:i + good_interval]
                        for i in xrange(0, range_max_value_tag-good_interval/2, good_interval)]

    # step 8
    print "step 8"

    def interval_quality(data_base, interval):
        return max([extended_quality_function(j) for j in interval])
    
    # TODO temp - remove later
    # plotting for testing
    fq = [interval_quality(data, i) for i in first_intervals]
    plt.plot(range(len(fq)), fq, 'bo', range(len(fq)), fq, 'r')
    lower_bound = max(fq) - math.log(1/delta)/eps
    plt.axhspan(lower_bound, lower_bound, color='green', alpha=0.5)
    plt.show()

    # step 9 ( using 'dist' algorithm)
    print "step 9"
    first_chosen_interval = basicdp.a_dist(data, first_intervals, interval_quality, eps, delta)
    second_chosen_interval = basicdp.a_dist(data, second_intervals, interval_quality, eps, delta)

    print "first A_dist returned: %s" % str(type(first_chosen_interval))
    print "second A_dist returned: %s" % str(type(second_chosen_interval))

    if type(first_chosen_interval) != list or type(second_chosen_interval) != list:
        raise ValueError('stability problem')

    # step 10
    print "step 10"
    return basicdp.exponential_mechanism(data, first_chosen_interval + second_chosen_interval,
                                         extended_quality_function_for_exponential_mechanism, eps, False)
Exemple #5
0
def evaluate(data,
             range_max_value,
             quality_function,
             quality_promise,
             approximation,
             eps,
             delta,
             recursion_bound,
             bulk=False):
    # TODO fix so it will work
    # TODO add docstring
    # TODO go through variables names
    if recursion_bound == 1 or range_max_value <= 32:
        return __rec_concave_basis__(range_max_value, quality_function, eps,
                                     data, bulk)
    else:
        recursion_bound -= 1

    # step 2
    print "step 2"
    log_of_range = int(math.ceil(math.log(range_max_value, 2)))
    range_max_value_tag = 2**log_of_range

    if bulk:
        qualities = quality_function(data, range(int(range_max_value) + 1))
    else:
        qualities = [
            quality_function(data, i) for i in range(int(range_max_value) + 1)
        ]
    qualities.extend([
        min(0, qualities[range_max_value])
        for _ in xrange(range_max_value, range_max_value_tag)
    ])

    def extended_quality_function(j):
        return qualities[j]

    # same but with signature that fits exponential mechanism requirements (used in step 10)
    def extended_quality_function_for_exponential_mechanism(data_set, j):
        return qualities[j]

    # step 3
    print "step 3"

    def intervals_bounding(j):
        if j == log_of_range + 1:
            return min(0, intervals_bounding(log_of_range))
        return max(
            min(extended_quality_function(e) for e in xrange(a, a + 2**j - 1))
            for a in xrange(0, range_max_value_tag - 2**j + 1))

    # step 4
    print "step 4"

    def recursive_quality_function(data_base, range_element):
        return min(
            intervals_bounding(range_element) -
            (1 - approximation) * quality_promise,
            quality_promise - intervals_bounding(range_element + 1))

    # step 5
    print "step 5"
    recursive_quality_promise = quality_promise * approximation / 2

    # step 6 - recursion call
    print "step 6 - recursive call"
    recursion_returned = evaluate(data, log_of_range,
                                  recursive_quality_function,
                                  recursive_quality_promise, 1 / 4, eps, delta,
                                  recursion_bound, True)

    good_interval = 8 * (2**recursion_returned)
    print "good interval: %d" % good_interval

    # step 7
    print "step 7"
    first_intervals = [
        range(range_max_value_tag)[i:i + good_interval]
        for i in xrange(0, range_max_value_tag, good_interval)
    ]

    second_intervals = [
        range(good_interval / 2, range_max_value_tag)[i:i + good_interval]
        for i in xrange(0, range_max_value_tag -
                        good_interval / 2, good_interval)
    ]

    # step 8
    print "step 8"

    def interval_quality(data_base, interval):
        return max([extended_quality_function(j) for j in interval])

    # TODO temp - remove later
    # plotting for testing
    fq = [interval_quality(data, i) for i in first_intervals]
    plt.plot(range(len(fq)), fq, 'bo', range(len(fq)), fq, 'r')
    lower_bound = max(fq) - math.log(1 / delta) / eps
    plt.axhspan(lower_bound, lower_bound, color='green', alpha=0.5)
    plt.show()

    # step 9 ( using 'dist' algorithm)
    print "step 9"
    first_chosen_interval = basicdp.a_dist(data, first_intervals,
                                           interval_quality, eps, delta)
    second_chosen_interval = basicdp.a_dist(data, second_intervals,
                                            interval_quality, eps, delta)

    print "first A_dist returned: %s" % str(type(first_chosen_interval))
    print "second A_dist returned: %s" % str(type(second_chosen_interval))

    if type(first_chosen_interval) != list or type(
            second_chosen_interval) != list:
        raise ValueError('stability problem')

    # step 10
    print "step 10"
    return basicdp.exponential_mechanism(
        data, first_chosen_interval + second_chosen_interval,
        extended_quality_function_for_exponential_mechanism, eps, False)