def test_valid_inclusion_probabilities(self): """Sanity checks on the inclusion probabilities in a private sample. This test contains various checks on the inclusion probabilities computed by the private sampling class that only returns keys: 1. When delta is low (0.5**30), the inclusion probability of an element with frequency 1 is delta. 2. When delta is 1.0, the inclusion probability is the same as in a non-private sample. 3. Inclusion probabilities are between 0.0 and 1.0, and are nondecreasing in the frequency. """ self.assertEqual( private_sampling.PrivateThresholdSampleKeysOnly( threshold=1, eps=0.1, delta=0.5**30).compute_inclusion_prob(1), 0.5**30) self.assertEqual( private_sampling.PrivateThresholdSampleKeysOnly( threshold=0.5, eps=0.1, delta=1.0, sampling_method=private_sampling.PrioritySamplingMethod). compute_inclusion_prob(1), 0.5) s = private_sampling.PrivateThresholdSampleKeysOnly(threshold=1, eps=0.1, delta=0.5**10) inclusion_prob = [ s.compute_inclusion_prob(i) for i in range(0, 1000, 10) ] for x in inclusion_prob: self.assertGreaterEqual(x, 0.0) self.assertLessEqual(x, 1.0) for i in range(len(inclusion_prob) - 1): self.assertGreaterEqual(inclusion_prob[i + 1], inclusion_prob[i])
def inclusion_prob_vec_for_private_sampling_keys_only(max_freq, threshold, eps, delta, sampling_method): """Computes the vector of inclusion probabilities for private sampling.""" s = private_sampling.PrivateThresholdSampleKeysOnly(threshold, eps, delta, sampling_method, store_every=1) s.compute_inclusion_prob(max_freq) return s._inclusion_prob.copy() # pylint: disable=protected-access
def compute_fraction_reported_pws( freq_vec, eps, delta, sampling_method=private_sampling.AlwaysIncludeSamplingMethod, threshold=1.0): """For a given vector of key frequencies, computes the expected number of keys to reported in a private weighted sample.""" s = private_sampling.PrivateThresholdSampleKeysOnly( threshold, eps, delta, sampling_method) expected_sample = 0.0 for freq in freq_vec: expected_sample += s.compute_inclusion_prob(freq) return expected_sample / len(freq_vec)
def plot_inclusion_prob_using_precompute(max_freq, sample, output_path): """Inclusion probability plots.""" eps = sample.eps delta = sample.delta sampling_method = sample.sampling_method threshold = sample.threshold log_threshold = math.log10(threshold) if int(log_threshold) == log_threshold: log_threshold = int(log_threshold) plt.clf() log1_delta = math.log10(delta) if log1_delta == int(log1_delta): log1_delta = int(log1_delta) include_non_private = True if sampling_method == private_sampling.AlwaysIncludeSamplingMethod or ( sampling_method == private_sampling.PrioritySamplingMethod and threshold == 1.0): include_non_private = False title = ("Inclusion Probability: No Sampling, $\\varepsilon=%s, " "\\delta=10^{%s}$") % (eps, log1_delta) elif sampling_method == private_sampling.PrioritySamplingMethod: title = ("Inclusion Probability: Priority Sampling $\\tau=10^{%s}, " "\\varepsilon=%s, \\delta=10^{%s}$") % (log_threshold, eps, log1_delta) elif sampling_method == private_sampling.PpsworSamplingMethod: title = ( "Inclusion Probability: PPSWOR $\\tau=10^{%s}, \\varepsilon=%s, " "\\delta=10^{%s}$") % (log_threshold, eps, log1_delta) else: raise NotImplementedError("Sampling method not supported") plt.xlabel("Frequency") plt.ylabel("Inclusion Probability") # plt.yscale("log", basey=10) # prob_vec_our = [1.0 - sample.compute_reported_frequency_dist(i)[0] # for i in range(1, max_freq + 1)] sample = private_sampling.PrivateThresholdSampleKeysOnly( threshold, eps, delta, sampling_method) prob_vec_our = [ sample.compute_inclusion_prob(i) for i in range(1, max_freq + 1) ] prob_vec_histogram = inclusion_prob_vec_using_private_histogram( max_freq, threshold, eps, delta, sampling_method) if include_non_private: plt.loglog(range(1, max_freq + 1), [ sampling_method.inclusion_prob(i, threshold) for i in range(1, int(max_freq) + 1) ], color="tab:green", label="Non-private", marker="d", markevery=0.25) plt.loglog(range(1, max_freq + 1), prob_vec_our, color="tab:blue", label="PWS", marker="s", markevery=0.25) plt.loglog(range(1, max_freq + 1), prob_vec_histogram, color="tab:orange", label="SbH", marker=".", markevery=0.25) plt.title(title) plt.legend() plt.savefig(output_path)