def __rec_concave_basis__(range_max_value, quality_function, eps, data, bulk=False): """recursion basis for the reconcave procedure - execute the exponential mechanism note that the parameters r,alpha,delta and N are not being used reconcave_basis(solution set size, quality function of sensitivity 1, eps privacy parameter, solution set) """ return basicdp.exponential_mechanism(data, range(int(range_max_value) + 1), quality_function, eps, bulk)
def __rec_sanitize__(samples, domain_range, alpha, beta, eps, delta, dimension): # print domain_range # print calls global calls global san_data # step 1 if calls == 0: return calls -= 1 # step 2 # the use of partial is redundant samples_domain_points = partial(points_in_subset, samples) noisy_points_in_range = samples_domain_points(subset=domain_range) + laplace(0, 1/eps, 1) sample_size = len(samples) # step 3 if noisy_points_in_range < alpha*sample_size/8: base_range = domain_range san_data.extend(base_range[1] * noisy_points_in_range) return san_data # step 4 domain_size = domain_range[1] - domain_range[0] + 1 log_size = int(ceil(log(domain_size, 2))) # not needed # size_tag = 2**log_size # step 6 def quality(data, j): return min(point_count_intervals_bounding(data, domain_range, j)-alpha * sample_size / 32, 3 * alpha * sample_size / 32 - point_count_intervals_bounding(data, domain_range, j-1)) # not needed if using exponential_mechanism # step 7 # promise = alpha * sample_size / 32 # step 8 new_eps = eps/3/log_star(dimension) # new_delta = delta/3/log_star(dimension) # note the use of exponential_mechanism instead of rec_concave z_tag = exponential_mechanism(samples, range(log_size+1), quality, new_eps) z = 2 ** z_tag # step 9 if z_tag == 0: point_counter = Counter(samples) def special_quality(data, b): return point_counter[b] b = choosing_mechanism(samples, range(domain_range[0], domain_range[1] + 1), special_quality, 1, alpha/64., beta, eps, delta) a = b # step 10 else: first_intervals = __build_intervals_set__(samples, 2*z, domain_range[0], domain_range[1] + 1) second_intervals = __build_intervals_set__(samples, 2*z_tag, domain_range[0], domain_range[1] + 1, True) intervals = [(i, i+2*z-1) for i in first_intervals+second_intervals] a, b = choosing_mechanism(samples, intervals, points_in_subset, 2, alpha/64., beta, eps, delta) if type(a) == str: raise ValueError("stability problem - choosing_mechanism returned 'bottom'") # step 11 # although not mentioned I assume the noisy value should be rounded noisy_count_ab = int(samples_domain_points((a, b)) + laplace(0, 1/eps, 1)) san_data.extend([b] * noisy_count_ab) # step 12 if a > domain_range[0]: rec_range = (domain_range[0], a - 1) __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension) if b < domain_range[1]: rec_range = (b + 1, domain_range[1]) __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension) return san_data
def __rec_concave_basis__(range_max_value, quality_function, eps, data, bulk=False): """recursion basis for the reconcave procedure - execute the exponential mechanism note that the parameters r,alpha,delta and N are not being used reconcave_basis(solution set size, quality function of sensitivity 1, eps privacy parameter, solution set) """ return basicdp.exponential_mechanism(data, range(int(range_max_value)+1), quality_function, eps, bulk)
def evaluate(data, range_max_value, quality_function, quality_promise, approximation, eps, delta, recursion_bound, bulk=False): # TODO fix so it will work # TODO add docstring # TODO go through variables names if recursion_bound == 1 or range_max_value <= 32: return __rec_concave_basis__(range_max_value, quality_function, eps, data, bulk) else: recursion_bound -= 1 # step 2 print "step 2" log_of_range = int(math.ceil(math.log(range_max_value, 2))) range_max_value_tag = 2 ** log_of_range if bulk: qualities = quality_function(data, range(int(range_max_value)+1)) else: qualities = [quality_function(data, i) for i in range(int(range_max_value)+1)] qualities.extend([min(0, qualities[range_max_value]) for _ in xrange(range_max_value, range_max_value_tag)]) def extended_quality_function(j): return qualities[j] # same but with signature that fits exponential mechanism requirements (used in step 10) def extended_quality_function_for_exponential_mechanism(data_set, j): return qualities[j] # step 3 print "step 3" def intervals_bounding(j): if j == log_of_range+1: return min(0, intervals_bounding(log_of_range)) return max(min(extended_quality_function(e) for e in xrange(a, a+2**j-1)) for a in xrange(0, range_max_value_tag-2**j+1)) # step 4 print "step 4" def recursive_quality_function(data_base, range_element): return min(intervals_bounding(range_element) - (1 - approximation) * quality_promise, quality_promise - intervals_bounding(range_element + 1)) # step 5 print "step 5" recursive_quality_promise = quality_promise * approximation / 2 # step 6 - recursion call print "step 6 - recursive call" recursion_returned = evaluate(data, log_of_range, recursive_quality_function, recursive_quality_promise, 1/4, eps, delta, recursion_bound, True) good_interval = 8 * (2 ** recursion_returned) print "good interval: %d" % good_interval # step 7 print "step 7" first_intervals = [range(range_max_value_tag)[i:i + good_interval] for i in xrange(0, range_max_value_tag, good_interval)] second_intervals = [range(good_interval/2, range_max_value_tag)[i:i + good_interval] for i in xrange(0, range_max_value_tag-good_interval/2, good_interval)] # step 8 print "step 8" def interval_quality(data_base, interval): return max([extended_quality_function(j) for j in interval]) # TODO temp - remove later # plotting for testing fq = [interval_quality(data, i) for i in first_intervals] plt.plot(range(len(fq)), fq, 'bo', range(len(fq)), fq, 'r') lower_bound = max(fq) - math.log(1/delta)/eps plt.axhspan(lower_bound, lower_bound, color='green', alpha=0.5) plt.show() # step 9 ( using 'dist' algorithm) print "step 9" first_chosen_interval = basicdp.a_dist(data, first_intervals, interval_quality, eps, delta) second_chosen_interval = basicdp.a_dist(data, second_intervals, interval_quality, eps, delta) print "first A_dist returned: %s" % str(type(first_chosen_interval)) print "second A_dist returned: %s" % str(type(second_chosen_interval)) if type(first_chosen_interval) != list or type(second_chosen_interval) != list: raise ValueError('stability problem') # step 10 print "step 10" return basicdp.exponential_mechanism(data, first_chosen_interval + second_chosen_interval, extended_quality_function_for_exponential_mechanism, eps, False)
def evaluate(data, range_max_value, quality_function, quality_promise, approximation, eps, delta, recursion_bound, bulk=False): # TODO fix so it will work # TODO add docstring # TODO go through variables names if recursion_bound == 1 or range_max_value <= 32: return __rec_concave_basis__(range_max_value, quality_function, eps, data, bulk) else: recursion_bound -= 1 # step 2 print "step 2" log_of_range = int(math.ceil(math.log(range_max_value, 2))) range_max_value_tag = 2**log_of_range if bulk: qualities = quality_function(data, range(int(range_max_value) + 1)) else: qualities = [ quality_function(data, i) for i in range(int(range_max_value) + 1) ] qualities.extend([ min(0, qualities[range_max_value]) for _ in xrange(range_max_value, range_max_value_tag) ]) def extended_quality_function(j): return qualities[j] # same but with signature that fits exponential mechanism requirements (used in step 10) def extended_quality_function_for_exponential_mechanism(data_set, j): return qualities[j] # step 3 print "step 3" def intervals_bounding(j): if j == log_of_range + 1: return min(0, intervals_bounding(log_of_range)) return max( min(extended_quality_function(e) for e in xrange(a, a + 2**j - 1)) for a in xrange(0, range_max_value_tag - 2**j + 1)) # step 4 print "step 4" def recursive_quality_function(data_base, range_element): return min( intervals_bounding(range_element) - (1 - approximation) * quality_promise, quality_promise - intervals_bounding(range_element + 1)) # step 5 print "step 5" recursive_quality_promise = quality_promise * approximation / 2 # step 6 - recursion call print "step 6 - recursive call" recursion_returned = evaluate(data, log_of_range, recursive_quality_function, recursive_quality_promise, 1 / 4, eps, delta, recursion_bound, True) good_interval = 8 * (2**recursion_returned) print "good interval: %d" % good_interval # step 7 print "step 7" first_intervals = [ range(range_max_value_tag)[i:i + good_interval] for i in xrange(0, range_max_value_tag, good_interval) ] second_intervals = [ range(good_interval / 2, range_max_value_tag)[i:i + good_interval] for i in xrange(0, range_max_value_tag - good_interval / 2, good_interval) ] # step 8 print "step 8" def interval_quality(data_base, interval): return max([extended_quality_function(j) for j in interval]) # TODO temp - remove later # plotting for testing fq = [interval_quality(data, i) for i in first_intervals] plt.plot(range(len(fq)), fq, 'bo', range(len(fq)), fq, 'r') lower_bound = max(fq) - math.log(1 / delta) / eps plt.axhspan(lower_bound, lower_bound, color='green', alpha=0.5) plt.show() # step 9 ( using 'dist' algorithm) print "step 9" first_chosen_interval = basicdp.a_dist(data, first_intervals, interval_quality, eps, delta) second_chosen_interval = basicdp.a_dist(data, second_intervals, interval_quality, eps, delta) print "first A_dist returned: %s" % str(type(first_chosen_interval)) print "second A_dist returned: %s" % str(type(second_chosen_interval)) if type(first_chosen_interval) != list or type( second_chosen_interval) != list: raise ValueError('stability problem') # step 10 print "step 10" return basicdp.exponential_mechanism( data, first_chosen_interval + second_chosen_interval, extended_quality_function_for_exponential_mechanism, eps, False)