def test_valid_inclusion_probabilities(self):
        """Sanity checks on the inclusion probabilities in a private sample.

    This test contains various checks on the inclusion probabilities computed by
    the private sampling class that only returns keys:
    1. When delta is low (0.5**30), the inclusion probability of an element with
       frequency 1 is delta.
    2. When delta is 1.0, the inclusion probability is the same as in a
       non-private sample.
    3. Inclusion probabilities are between 0.0 and 1.0, and are nondecreasing in
       the frequency.
    """
        self.assertEqual(
            private_sampling.PrivateThresholdSampleKeysOnly(
                threshold=1, eps=0.1, delta=0.5**30).compute_inclusion_prob(1),
            0.5**30)
        self.assertEqual(
            private_sampling.PrivateThresholdSampleKeysOnly(
                threshold=0.5,
                eps=0.1,
                delta=1.0,
                sampling_method=private_sampling.PrioritySamplingMethod).
            compute_inclusion_prob(1), 0.5)
        s = private_sampling.PrivateThresholdSampleKeysOnly(threshold=1,
                                                            eps=0.1,
                                                            delta=0.5**10)
        inclusion_prob = [
            s.compute_inclusion_prob(i) for i in range(0, 1000, 10)
        ]
        for x in inclusion_prob:
            self.assertGreaterEqual(x, 0.0)
            self.assertLessEqual(x, 1.0)
        for i in range(len(inclusion_prob) - 1):
            self.assertGreaterEqual(inclusion_prob[i + 1], inclusion_prob[i])
def inclusion_prob_vec_for_private_sampling_keys_only(max_freq, threshold, eps,
                                                      delta, sampling_method):
    """Computes the vector of inclusion probabilities for private sampling."""
    s = private_sampling.PrivateThresholdSampleKeysOnly(threshold,
                                                        eps,
                                                        delta,
                                                        sampling_method,
                                                        store_every=1)
    s.compute_inclusion_prob(max_freq)
    return s._inclusion_prob.copy()  # pylint: disable=protected-access
def compute_fraction_reported_pws(
        freq_vec,
        eps,
        delta,
        sampling_method=private_sampling.AlwaysIncludeSamplingMethod,
        threshold=1.0):
    """For a given vector of key frequencies, computes the expected number of keys to reported in a private weighted sample."""
    s = private_sampling.PrivateThresholdSampleKeysOnly(
        threshold, eps, delta, sampling_method)
    expected_sample = 0.0
    for freq in freq_vec:
        expected_sample += s.compute_inclusion_prob(freq)
    return expected_sample / len(freq_vec)
def plot_inclusion_prob_using_precompute(max_freq, sample, output_path):
    """Inclusion probability plots."""
    eps = sample.eps
    delta = sample.delta
    sampling_method = sample.sampling_method
    threshold = sample.threshold
    log_threshold = math.log10(threshold)
    if int(log_threshold) == log_threshold:
        log_threshold = int(log_threshold)
    plt.clf()
    log1_delta = math.log10(delta)
    if log1_delta == int(log1_delta):
        log1_delta = int(log1_delta)
    include_non_private = True
    if sampling_method == private_sampling.AlwaysIncludeSamplingMethod or (
            sampling_method == private_sampling.PrioritySamplingMethod
            and threshold == 1.0):
        include_non_private = False
        title = ("Inclusion Probability: No Sampling, $\\varepsilon=%s, "
                 "\\delta=10^{%s}$") % (eps, log1_delta)
    elif sampling_method == private_sampling.PrioritySamplingMethod:
        title = ("Inclusion Probability: Priority Sampling $\\tau=10^{%s}, "
                 "\\varepsilon=%s, \\delta=10^{%s}$") % (log_threshold, eps,
                                                         log1_delta)
    elif sampling_method == private_sampling.PpsworSamplingMethod:
        title = (
            "Inclusion Probability: PPSWOR $\\tau=10^{%s}, \\varepsilon=%s, "
            "\\delta=10^{%s}$") % (log_threshold, eps, log1_delta)
    else:
        raise NotImplementedError("Sampling method not supported")
    plt.xlabel("Frequency")
    plt.ylabel("Inclusion Probability")
    # plt.yscale("log", basey=10)
    # prob_vec_our = [1.0 - sample.compute_reported_frequency_dist(i)[0]
    #   for i in range(1, max_freq + 1)]
    sample = private_sampling.PrivateThresholdSampleKeysOnly(
        threshold, eps, delta, sampling_method)
    prob_vec_our = [
        sample.compute_inclusion_prob(i) for i in range(1, max_freq + 1)
    ]
    prob_vec_histogram = inclusion_prob_vec_using_private_histogram(
        max_freq, threshold, eps, delta, sampling_method)
    if include_non_private:
        plt.loglog(range(1, max_freq + 1), [
            sampling_method.inclusion_prob(i, threshold)
            for i in range(1,
                           int(max_freq) + 1)
        ],
                   color="tab:green",
                   label="Non-private",
                   marker="d",
                   markevery=0.25)
    plt.loglog(range(1, max_freq + 1),
               prob_vec_our,
               color="tab:blue",
               label="PWS",
               marker="s",
               markevery=0.25)
    plt.loglog(range(1, max_freq + 1),
               prob_vec_histogram,
               color="tab:orange",
               label="SbH",
               marker=".",
               markevery=0.25)
    plt.title(title)
    plt.legend()
    plt.savefig(output_path)