コード例 #1
0
    def by_impressions(self,
                       impressions: List[int],
                       max_frequency: int = 1) -> ReachPoint:
        """Returns the estimated reach as a function of impressions.

        Args:
          impressions: list of ints of length 1, specifying the hypothetical number
            of impressions that are shown.
          max_frequency: int, specifies the number of frequencies for which reach
            will be reported.
        Returns:
          A ReachPoint specifying the estimated reach for this number of impressions.
        """
        if len(impressions) != 1:
            raise ValueError("Impressions vector must have a length of 1.")

        self._fit(impressions[0] + 1)

        p = min(impressions[0] / self._max_impressions, 1.0)
        freqs = self._max_reach * self._dist.kreach(
            np.arange(1, max_frequency), p)
        kplus = self._max_reach * self._dist.kplusreach(max_frequency, p)
        hist = list(freqs) + [kplus]
        kplus_reaches = np.cumsum(hist[::-1])[::-1]
        if self._cpi:
            return ReachPoint(impressions, kplus_reaches,
                              [impressions[0] * self._cpi])
        else:
            return ReachPoint(impressions, kplus_reaches)
    def by_impressions(
        self, impressions: List[int], max_frequency: int = 1
    ) -> ReachPoint:
        """Returns the estimated reach as a function of impressions.

        Args:
          impressions: list of ints of length 1, specifying the hypothetical number
            of impressions that are shown.
          max_frequency: int, specifies the number of frequencies for which reach
            will be reported.
        Returns:
          A ReachPoint specifying the estimated reach for this number of impressions.
        """
        if len(impressions) != 1:
            raise ValueError("Impressions vector must have a length of 1.")
        hist = self._expected_histogram(
            min(impressions[0], self._max_impressions - 1),
            self._max_impressions,
            self.max_reach,
            self._alpha,
            self._beta,
            MAXIMUM_COMPUTATIONAL_FREQUENCY,
        )
        kplus_reach = self._kplus_reaches_from_frequencies(hist)
        if self._cpi:
            return ReachPoint(
                impressions, kplus_reach[:max_frequency], [impressions[0] * self._cpi]
            )
        else:
            return ReachPoint(impressions, kplus_reach[:max_frequency])
def _shuffle_distance(xpoint: ReachPoint, ypoint: ReachPoint, k=5) -> float:
    """Computes shuffle distance of first k frequency buckets."""
    if xpoint.max_frequency <= k or ypoint.max_frequency <= k:
        return 1.0
    xfreq = np.array([xpoint.frequency(i + 1) for i in range(k)])
    yfreq = np.array([ypoint.frequency(i + 1) for i in range(k)])
    if sum(xfreq) == 0 or sum(yfreq) == 0:
        return 0.5
    return 0.5 * np.sum(np.abs(xfreq / sum(xfreq) - yfreq / sum(yfreq)))
    def true_reach_by_spend(self, spend: float, max_frequency: int = 1) -> ReachPoint:
        """Returns the true reach obtained for a given spend vector.

        Args:
            spend:  The hypothetical amount spent.
            max_frequency:  The maximum frequency for which to report reach.
        Returns:
            A ReachPoint representing the true reach that would have been
            obtained for this spend.
        """
        user_counts = self._publisher_data.user_counts_by_spend(spend)
        impressions = sum(user_counts.values())
        reach = ReachPoint.user_counts_to_kplus_reaches(user_counts, max_frequency)
        return ReachPoint([impressions], reach, [spend])
コード例 #5
0
    def test_fit_exponential_poisson_model(self):
        p1 = ReachPoint([20000], [10000])
        kgpm1 = KInflatedGammaPoissonModel([p1])
        N1, dist1 = kgpm1._fit_exponential_poisson_model(p1)
        self.assertAlmostEqual(N1, 13333.33, delta=1)
        self.assertAlmostEqual(dist1._alpha, 1.0)
        self.assertAlmostEqual(dist1._beta, 2.0, delta=0.1)

        p2 = ReachPoint([19971],
                        [7993, 4815, 2914, 1759, 1011, 604, 355, 214, 122, 75])
        kgpm2 = KInflatedGammaPoissonModel([p2])
        N2, dist2 = kgpm1._fit_exponential_poisson_model(p2)
        self.assertAlmostEqual(N2, 8564, delta=1)
        self.assertAlmostEqual(dist2._alpha, 1.0)
        self.assertAlmostEqual(dist2._beta, 1.8, delta=0.1)
    def reach_by_spend(self,
                       spends: Iterable[float],
                       max_frequency: int = 10) -> ReachPoint:
        """Number of people reached for a given spend.

        Args:
          spends:  A list of spend amounts.  The length of the list must
            equal the value of publisher_count.  Specifies the amount spent with
            each publisher.
          max_frequency: int, The maximum frequency that should be counted.  All
            frequencies about this amount will be grouped into a single bucket.
        Returns:
          A ReachPoint object representing the k+ reach for each frequency
          in the range 1..max_frequency.
        """
        if len(spends) != self.publisher_count:
            raise ValueError(
                "Invalid spends vector length.  Got {}, expected {}".format(
                    len(spends), self.publisher_count))
        counts = defaultdict(int)
        impressions = []
        for i, publisher_spend in enumerate(spends):
            user_counts = self._data[i].user_counts_by_spend(publisher_spend)
            impressions.append(sum(user_counts.values()))
            for id, freq in user_counts.items():
                counts[id] += freq
        kplus_reaches = self._counts_to_histogram(counts, max_frequency)
        return ReachPoint(impressions, kplus_reaches, spends)
コード例 #7
0
 def test_expected_histogram(self):
     gpm = GammaPoissonModel([ReachPoint([20], [10])])
     h_actual = gpm._expected_histogram(4, 12, 16, 1, 1, max_freq=3)
     self.assertLen(h_actual, 3)
     self.assertAlmostEqual(h_actual[0], 6)
     self.assertAlmostEqual(h_actual[1], 3 / 2)
     self.assertAlmostEqual(h_actual[2], 3 / 8)
コード例 #8
0
    def test_logpmf(self):
        # The coded implementation of the Gamma-Poisson makes use of the fact
        # that a Gamma-Poisson with parameters (alpha, beta) is equivalent
        # to a negative binomial with parameters (p, r) =
        # (beta / (1 + beta), alpha).  In this test, we compute the
        # Gamma-Poisson directly through numerical integration and compare
        # it to the values computed via the negative binomial.
        def gamma_poisson_integrand(k, mu, alpha, beta):
            return scipy.stats.poisson.pmf(k, mu) * scipy.stats.gamma.pdf(
                mu, alpha, scale=1.0 / beta)

        def gamma_poisson_pmf(k, alpha, beta):
            return scipy.integrate.quad(
                lambda x: gamma_poisson_integrand(k, x, alpha, beta), 0.0,
                np.Inf)[0]

        gpm = GammaPoissonModel([ReachPoint([20], [10])])
        self.assertAlmostEqual(gpm._logpmf(1, 1.0, 1.0),
                               np.log(gamma_poisson_pmf(0, 1.0, 1.0)))

        self.assertAlmostEqual(gpm._logpmf(2, 1.0, 1.0),
                               np.log(gamma_poisson_pmf(1, 1.0, 1.0)))

        self.assertAlmostEqual(gpm._logpmf(2, 3.0, 1.0),
                               np.log(gamma_poisson_pmf(1, 3.0, 1.0)))

        self.assertAlmostEqual(gpm._logpmf(2, 1.0, 4.0),
                               np.log(gamma_poisson_pmf(1, 1.0, 4.0)))
コード例 #9
0
 def test_by_spend(self):
     surface = FakeReachSurface([ReachPoint([1000, 2000], [1000])])
     surface._fit()
     self.assertEqual(surface.by_spend([1, 2]).reach(), 300)
     self.assertEqual(surface.by_spend([1, 2], 3).reach(2), 150)
     self.assertEqual(surface.by_spend([1, 2], 3).reach(3), 100)
     self.assertEqual(surface.by_spend([0.5, 1], 3).reach(3), 50)
 def by_impressions(self, impressions: [int], max_frequency: int = 1) -> ReachPoint:
     """Returns the estimated reach for a given impression vector."""
     kplus_frequencies = [
         min(sum(impressions), self.max_reach) // i
         for i in range(1, max_frequency + 1)
     ]
     return ReachPoint(impressions, kplus_frequencies)
コード例 #11
0
 def test_exponential_poisson_N(self):
     kgpm = KInflatedGammaPoissonModel([ReachPoint([100], [100])])
     self.assertAlmostEqual(kgpm._exponential_poisson_N(19971, 7992),
                            9769,
                            delta=1)
     self.assertAlmostEqual(kgpm._exponential_poisson_N(20000, 10000),
                            13333.33,
                            delta=1)
コード例 #12
0
 def test_fit_variable_N(self, mock_gamma_poisson_model):
     mock_gamma_poisson_model.return_value = (30000, 10000, 1.0, 2.0)
     h_actual = [2853, 813, 230, 64, 17, 4, 1, 0, 0, 0]
     rp = ReachPoint([4000], h_actual)
     gpm = GammaPoissonModel([rp])
     gpm._fit()
     self.assertAlmostEqual(gpm._max_reach, 10000, delta=1)
     self.assertAlmostEqual(gpm._alpha, 1.0, delta=0.01)
コード例 #13
0
 def test_kreach(self):
     gpm = GammaPoissonModel([ReachPoint([20], [10])])
     self.assertAlmostEqual(gpm._kreach([0], 1, 2, 1, 1)[0], 1 / 3)
     self.assertAlmostEqual(gpm._kreach([0], 1, 3, 1, 1)[0], 1 / 2)
     self.assertAlmostEqual(gpm._kreach([1], 1, 3, 1, 1)[0], 3 / 8)
     self.assertAlmostEqual(gpm._kreach([0, 1, 2], 1, 3, 1, 1)[0], 1 / 2)
     self.assertAlmostEqual(gpm._kreach([0, 1, 2], 1, 3, 1, 1)[1], 3 / 8)
     self.assertAlmostEqual(gpm._kreach([0, 1, 2], 1, 3, 1, 1)[2], 3 / 32)
コード例 #14
0
 def test_exponential_poisson_reach(self):
     kgpm = KInflatedGammaPoissonModel([ReachPoint([100], [100])])
     self.assertAlmostEqual(
         kgpm._exponential_poisson_reach(20000, 10000, 3.0), 8000.0)
     self.assertAlmostEqual(kgpm._exponential_poisson_reach(
         19971, 12560, 26.19),
                            7888.78,
                            delta=1)
コード例 #15
0
 def test_exponential_poisson_N_from_beta(self):
     kgpm = KInflatedGammaPoissonModel([ReachPoint([100], [100])])
     self.assertAlmostEqual(kgpm._exponential_poisson_N_from_beta(
         19971, 7992, 2.41),
                            9416.0,
                            delta=1)
     self.assertAlmostEqual(
         kgpm._exponential_poisson_N_from_beta(30000, 10000, 2), 10000.0)
コード例 #16
0
 def by_spend(self, spend: [float], max_frequency: int = 1) -> ReachPoint:
     """Returns the estimated reach for a given spend vector."""
     impressions = [100 * s for s in spend]
     kplus_frequencies = [
         min(sum(impressions), self.max_reach) // i
         for i in range(1, max_frequency + 1)
     ]
     return ReachPoint(impressions, kplus_frequencies, spend)
コード例 #17
0
 def test_by_impressions(self, mock_gamma_poisson_model):
     mock_gamma_poisson_model.return_value = (25000, 5.0, 2.0)
     # Imax = 25000, N = 10000, alpha = 5, beta = 2
     h_training = [8124, 5464, 3191, 1679, 815, 371, 159, 64, 23, 6, 0]
     rp = ReachPoint([20000], h_training, [200.0])
     gpm = GammaPoissonModel([rp], max_reach=10000)
     gpm._fit()
     rp = gpm.by_impressions([10000], max_frequency=5)
     h_expected = np.array([9682, 8765, 7353, 5750, 4233])
     h_actual = np.array([int(rp.reach(i)) for i in range(1, 6)])
     total_error = np.sum((h_expected - h_actual)**2 / h_expected)
     self.assertAlmostEqual(rp.spends[0], 100.0)
     for i in range(len(h_actual)):
         self.assertTrue(
             (h_actual[i] - h_expected[i])**2 / h_actual[i] < 0.1,
             f"Discrepancy found at position {i}. "
             f"Got {h_actual[i]} Expected {h_expected[i]}",
         )
 def generate_sample_reach_curves(self, num_publishers, decay_rate, universe_size):
     max_reaches = [
         universe_size * (decay_rate ** pub_num) for pub_num in range(num_publishers)
     ]
     reach_curves = []
     for max_reach in max_reaches:
         curve = LinearCappedReachCurve([ReachPoint([max_reach], (max_reach,))])
         curve._fit()
         reach_curves.append(curve)
     return reach_curves
 def generate_true_reach_independent_two_pubs(self, universe_size,
                                              reach_curves, impressions):
     p = len(reach_curves)
     reach_vector = [
         reach_curve.by_impressions([impression]).reach()
         for reach_curve, impression in zip(reach_curves, impressions)
     ]
     reach = sum(reach_vector) - (reduce(operator.mul, reach_vector) /
                                  universe_size)
     return ReachPoint(impressions, [reach])
コード例 #20
0
 def test_overspend(self, mock_gamma_poisson_model):
     # Imax = 25000, N = 10000, alpha = 5, beta = 2
     h_training = [8124, 5464, 3191, 1679, 815, 371, 159, 64, 23, 6, 0]
     rp = ReachPoint([20000], h_training, [200.0])
     gpm = GammaPoissonModel([rp], max_reach=10000)
     gpm._fit()
     self.assertAlmostEqual(gpm.by_impressions([30000]).reach(1),
                            10000.0,
                            delta=0.1)
     self.assertAlmostEqual(gpm.by_spend([300]).reach(1),
                            10000.0,
                            delta=0.1)
 def test_user_counts_to_frequencies(self):
     self.assertEqual(ReachPoint.user_counts_to_frequencies({}, 3),
                      [0, 0, 0])
     self.assertEqual(ReachPoint.user_counts_to_frequencies({3: 1}, 3),
                      [1, 0, 0])
     self.assertEqual(
         ReachPoint.user_counts_to_frequencies({
             3: 1,
             2: 1
         }, 3), [2, 0, 0])
     self.assertEqual(
         ReachPoint.user_counts_to_frequencies(
             {
                 3: 1,
                 2: 1,
                 1: 2,
                 4: 3,
                 5: 4
             }, 3),
         [2, 1, 2],
     )
コード例 #22
0
 def test_by_impressions(self, mock_fit_point):
     mock_fit_point.return_value = (
         10000,
         KInflatedGammaPoissonDistribution(5.0, 2.0, []),
     )
     # Imax = 25000, N = 10000, alpha = 5, beta = 2
     h_training = [7412, 4233, 2014, 842, 320, 112, 37, 11, 2]
     rp = ReachPoint([15000], h_training, [200.0])
     kgpm = KInflatedGammaPoissonModel([rp])
     kgpm._fit()
     rp = kgpm.by_impressions([10000], max_frequency=5)
     h_expected = np.array([6056, 2629, 925, 283, 78])
     h_actual = np.array([int(rp.reach(i)) for i in range(1, 6)])
     total_error = np.sum((h_expected - h_actual)**2 / h_expected)
     self.assertAlmostEqual(rp.spends[0], 133.0, delta=1)
     for i in range(len(h_actual)):
         self.assertTrue(
             (h_actual[i] - h_expected[i])**2 / h_actual[i] < 0.1,
             f"Discrepancy found at position {i}. "
             f"Got {h_actual[i]} Expected {h_expected[i]}",
         )
 def generate_true_reach(self, a, reach_curves, impressions, spends=None):
     p = len(reach_curves)
     reach_vector = [
         reach_curve.by_impressions([impression]).reach()
         for reach_curve, impression in zip(reach_curves, impressions)
     ]
     reach = sum(reach_vector)
     for i in range(len(reach_curves)):
         for j in range(len(reach_curves)):
             reach -= (a[i * p + j] * reach_vector[i] * reach_vector[j]) / (
                 max(reach_curves[i].max_reach, reach_curves[j].max_reach) * 2
             )
     return ReachPoint(impressions, [reach], spends)
コード例 #24
0
 def test_knreach(self):
     gpm = GammaPoissonModel([ReachPoint([20], [10])])
     self.assertAlmostEqual(gpm._knreach(1, 1, 1, 2, 1.0, 1.0), 0.25)
     self.assertAlmostEqual(gpm._knreach(2, 1, 1, 2, 1.0, 1.0), 0)
     self.assertAlmostEqual(gpm._knreach(1, 1, 1, 2, 1.0, 2.0), 1.0 / 6.0)
     self.assertAlmostEqual(
         gpm._knreach(1, np.array([1]), 1, 2, 1.0, 1.0)[0], 1 / 4)
     self.assertAlmostEqual(
         gpm._knreach(1, np.array([1, 2]), 1, 2, 1.0, 1.0)[0], 1 / 4)
     self.assertAlmostEqual(
         gpm._knreach(1, np.array([1, 2]), 1, 2, 1.0, 1.0)[1], 1 / 8)
     self.assertAlmostEqual(gpm._knreach(1, 3, 1, 3, 1.0, 1.0),
                            3 * (1 / 3) * (2 / 3)**2 * (1 / 8))
 def test_user_counts_to_kplus_reaches(self):
     self.assertEqual(ReachPoint.user_counts_to_kplus_reaches({}, 3),
                      [0, 0, 0])
     self.assertEqual(ReachPoint.user_counts_to_kplus_reaches({3: 1}, 3),
                      [1, 0, 0])
     self.assertEqual(ReachPoint.user_counts_to_kplus_reaches({3: 2}, 3),
                      [1, 1, 0])
     self.assertEqual(
         ReachPoint.user_counts_to_kplus_reaches({
             3: 1,
             2: 1
         }, 3), [2, 0, 0])
     self.assertEqual(
         ReachPoint.user_counts_to_kplus_reaches(
             {
                 3: 1,
                 2: 1,
                 1: 2,
                 4: 3,
                 5: 4
             }, 3),
         [5, 3, 2],
     )
コード例 #26
0
 def fit(self, halo: HaloSimulator, params: SystemParameters,
         budget: PrivacyBudget) -> ReachSurface:
     total_reach = ReachPoint(
         [
             2,
         ],
         [
             2,
         ],
         [2.0],
     )
     curve = GoergModel([total_reach])
     curve._fit()
     return curve
    def by_impressions(self,
                       impressions: Iterable[int],
                       max_frequency: int = 1) -> ReachPoint:

        reach_vector = self.get_reach_vector(impressions)
        reach = 0
        for j in range(self._c):
            w = 1
            for i in range(self._p):
                w *= 1 - (
                    (self.a[j + i * self._c] * reach_vector[i]) / self._N)
            reach += 1 - w
        reach *= self._N
        return ReachPoint(impressions, [reach])
コード例 #28
0
    def simulated_reach_by_spend(
        self,
        spends: List[float],
        budget: PrivacyBudget,
        privacy_budget_split: float = 0.5,
        max_frequency: int = 1,
    ) -> ReachPoint:
        rp1 = self.curve.by_spend([spends[0]], max_frequency=max_frequency)
        rp2 = self.curve.by_spend([spends[1]], max_frequency=max_frequency)

        imps = rp1.impressions[0] + rp2.impressions[0]
        spend = rp1.spends[0] + rp2.spends[0]
        freqs = [rp1.reach(i) + rp2.reach(i) for i in range(1, max_frequency + 1)]
        return ReachPoint([rp1.impressions[0], rp2.impressions[0]], freqs, spends)
    def _generate_reach_points_from_venn_diagram(
        self, spends: List[float], primitive_regions: Dict[int, int]
    ) -> List[ReachPoint]:
        """Return the reach points of the powerset of active publishers.

        For each subset of active publishers, compute reach estimate for those
        users who are reached by at least one of the active publishers in the
        subset. Note that the reach points generated by the implementation
        contain 1+ reaches.

        Args:
            spends:  The hypothetical spend vector, equal in length to the
              number of publishers.  spends[i] is the amount that is spent with
              publisher i.
            primitive_regions:  A dictionary in which each key is the binary
              representation of a primitive region of the Venn diagram, and
              each value is the reach in the corresponding region.
              Note that the binary representation of the key represents the
              formation of publisher IDs in that primitive region. For example,
              primitive_regions[key] with key = 5 = bin('101') is the region
              which belongs to pub_id-0 and id-2.
        Returns:
            A list of ReachPoint. Each reach point represents the mapping from
            the spends of a subset of active publishers to the number of people
            reached in this subset.
        """
        active_pub_set = [i for i in range(len(spends)) if spends[i]]
        active_pub_powerset = chain.from_iterable(
            combinations(active_pub_set, set_size)
            for set_size in range(1, len(active_pub_set) + 1)
        )
        impressions = self._data_set.impressions_by_spend(spends)

        reach_points = []
        for sub_pub_ids in active_pub_powerset:
            sub_reach = self._aggregate_reach_in_primitive_venn_diagram_regions(
                sub_pub_ids, primitive_regions
            )

            pub_subset = set(sub_pub_ids)
            pub_vector = np.array([int(i in pub_subset) for i in range(len(spends))])
            sub_imps = np.array(impressions) * pub_vector
            sub_spends = np.array(spends) * pub_vector

            reach_points.append(
                ReachPoint(sub_imps.tolist(), [sub_reach], sub_spends.tolist())
            )

        return reach_points
    def by_impressions(self, impressions: [int], max_frequency: int = 1) -> ReachPoint:
        """Returns the estimated reach as a function of impressions.

        Args:
          impressions: list of ints of length 1, specifying the hypothetical number
            of impressions that are shown.
          max_frequency: int, specifies the number of frequencies for which reach
            will be reported.
        Returns:
          A ReachPoint specifying the estimated reach for this number of impressions.
        """
        if len(impressions) != 1:
            raise ValueError("Impressions vector must have a length of 1.")
        kplus_reach_list = []
        for k in range(1, max_frequency + 1):
            kplus_reach = (
                self._rho * (impressions[0] / (impressions[0] + self._beta)) ** k
            )
            kplus_reach_list.append(kplus_reach)
        if self._cpi:
            spend = impressions[0] * self._cpi
            return ReachPoint(impressions, kplus_reach_list, [spend])
        else:
            return ReachPoint(impressions, kplus_reach_list)