Esempio n. 1
0
    def test_m3_strategy_with_ground_truth(self):
        data1 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=2)()
        publisher1 = PublisherData(FixedPriceGenerator(0.1)(data1))
        data2 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=3)()
        publisher2 = PublisherData(FixedPriceGenerator(0.05)(data2))
        dataset = DataSet([publisher1, publisher2], "dataset")

        params = SystemParameters(
            [100.0, 100.0], LiquidLegionsParameters(), np.random.default_rng(seed=1)
        )
        halo = HaloSimulator(dataset, params, PrivacyTracker())

        budget = PrivacyBudget(1.0, 1e-5)
        m3strategy = M3Strategy(
            GammaPoissonModel,
            {},
            RestrictedPairwiseUnionReachSurface,
            {},
            use_ground_truth_for_reach_curves=True,
        )
        surface = m3strategy.fit(halo, params, budget)

        expected0 = surface.by_spend([10.0, 0.0]).reach(1)
        actual0 = dataset.reach_by_spend([10.0, 0.0]).reach(1)
        self.assertAlmostEqual(expected0, actual0, delta=1)

        expected1 = surface.by_spend([0.0, 10.0]).reach(1)
        actual1 = dataset.reach_by_spend([0.0, 10.0]).reach(1)
        self.assertAlmostEqual(expected1, actual1, delta=1)

        expected2 = surface.by_spend([10.0, 10.0]).reach(1)
        actual2 = dataset.reach_by_spend([10.0, 10.0]).reach(1)
        self.assertAlmostEqual(expected2, actual2, delta=10)
Esempio n. 2
0
    def test_simulated_venn_diagram_reach_by_spend_without_active_pub(self):
        pdfs = [
            PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1"),
            PublisherData([(2, 0.03), (4, 0.06)], "pdf2"),
            PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"),
        ]
        data_set = DataSet(pdfs, "test")
        params = SystemParameters(
            [0.4, 0.5, 0.4],
            LiquidLegionsParameters(),
            FakeRandomGenerator(),
        )
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        spends = [0, 0, 0]
        budget = PrivacyBudget(0.2, 0.4)
        privacy_budget_split = 0.5
        max_freq = 1

        reach_points = halo.simulated_venn_diagram_reach_by_spend(
            spends, budget, privacy_budget_split, max_freq)

        expected_reach_points = []

        self.assertEqual(expected_reach_points, reach_points)
        self.assertEqual(halo.privacy_tracker.privacy_consumption.epsilon, 0)
        self.assertEqual(halo.privacy_tracker.privacy_consumption.delta, 0)
        self.assertEqual(len(halo.privacy_tracker._noising_events), 0)
Esempio n. 3
0
    def test_compute_trial_results_path(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._compute_trial_results_path()
            expected = "{}/{}/{},{},{},{}".format(
                "edir",
                "dataset",
                "strategy,single_pub_model,multi_pub_model",
                "spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0",
                "epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5",
                "test_point_strategy=tps.csv",
            )
            self.assertEqual(actual, expected)
def generate_experimental_design_config(
        seed: int = 1) -> Iterable[TrialDescriptor]:
    """Generates a list of TrialDescriptors for a single publisher model."""
    keys = list(LEVELS.keys())
    levels = [LEVELS[k] for k in keys]
    for sample in itertools.product(*levels):
        design_parameters = dict(zip(keys, sample))
        mstrategy = ModelingStrategyDescriptor(
            "single_publisher",
            {},
            design_parameters["modeling_strategy"][0],
            design_parameters["modeling_strategy"][1],
            "none",
            {},
        )
        sparams = SystemParameters(
            [design_parameters["campaign_spend_fraction"]],
            LiquidLegionsParameters(
                design_parameters["liquid_legions_decay_rate"],
                design_parameters["liquid_legions_sketch_size"],
            ),
            np.random.default_rng(seed=seed),
        )
        eparams = ExperimentParameters(
            PrivacyBudget(
                design_parameters["privacy_budget_epsilon"],
                design_parameters["privacy_budget_delta"],
            ),
            design_parameters["replica_id"],
            design_parameters["max_frequency"],
            "grid",
            {"grid_size": design_parameters["test_points"]},
        )
        yield TrialDescriptor(mstrategy, sparams, eparams)
def generate_experimental_design_config(seed: int = 1) -> Iterable[TrialDescriptor]:
    """Generates a list of TrialDescriptors.

    This examples illustrates a latin hypercube sampling strategy.
    """
    keys = LEVELS.keys()
    levels = [len(LEVELS[k]) for k in keys]
    np.random.seed(seed)
    for i, sample in enumerate(
        lhs(n=len(levels), samples=NUM_TRIALS_PER_DATASET, criterion="maximin")
    ):
        design_parameters = {}
        for key, level in zip(keys, sample):
            design_parameters[key] = LEVELS[key][int(level * len(LEVELS[key]))]
        mstrategy = design_parameters["modeling_strategies"]
        sparams = SystemParameters(
            design_parameters["campaign_spend_fractions"],
            design_parameters["liquid_legions_params"],
            np.random.default_rng(seed=seed),
        )
        test_point_generator, test_point_params = design_parameters[
            "test_point_strategies"
        ]
        eparams = ExperimentParameters(
            design_parameters["privacy_budgets"],
            design_parameters["replica_ids"],
            design_parameters["max_frequencies"],
            test_point_generator,
            test_point_params,
        )
        yield TrialDescriptor(mstrategy, sparams, eparams)
Esempio n. 6
0
 def test_class_setup_with_campaign_spend_fractions_generator(self):
     pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1")
     pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
     data_set = DataSet([pdf1, pdf2], "test")
     params = SystemParameters(
         liquid_legions=LiquidLegionsParameters(),
         generator=np.random.default_rng(1),
         campaign_spend_fractions_generator=lambda dataset: [0.2] * dataset.
         publisher_count,
     )
     params = params.update_from_dataset(data_set)
     privacy_tracker = PrivacyTracker()
     halo = HaloSimulator(data_set, params, privacy_tracker)
     self.assertAlmostEqual(halo._campaign_spends[0], 0.01, 7)
     # using assertAlmostEqual here because of a rounding error
     self.assertAlmostEqual(halo._campaign_spends[1], 0.012, 7)
Esempio n. 7
0
 def setUpClass(cls):
     cls.simulator = PlannerSimulator(
         FakeHaloSimulator(),
         FakeModelingStrategy(),
         SystemParameters([], LiquidLegionsParameters(),
                          np.random.default_rng(1)),
         PrivacyTracker(),
     )
Esempio n. 8
0
 def test_sample_venn_diagram(self, regions, sample_size, expected):
     params = SystemParameters([0], LiquidLegionsParameters(),
                               FakeRandomGenerator())
     halo = HaloSimulator(DataSet([], "test"), params, PrivacyTracker())
     self.assertEqual(
         halo._sample_venn_diagram(regions, sample_size),
         expected,
     )
 def setUpClass(cls):
     pdf = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                         "pdf1")
     cls.params = SystemParameters([1.0, 0.5, 3.0],
                                   LiquidLegionsParameters(),
                                   np.random.default_rng(1))
     cls.privacy_tracker = PrivacyTracker()
     cls.publisher = Publisher(pdf, 1, cls.params, cls.privacy_tracker)
Esempio n. 10
0
    def test_evaluate(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = FakeModelingStrategy
            TEST_POINT_STRATEGIES[
                "fake_tps"] = lambda ds, rng: FakeTestPointGenerator(
                ).test_points()

            msd = ModelingStrategyDescriptor("fake", {"x": 1}, "goerg", {},
                                             "pairwise_union", {})
            sparams1 = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            sparams2 = SystemParameters(
                [0.05, 0.03],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams1 = ExperimentParameters(PrivacyBudget(1.0, 0.01), 1, 5,
                                            "fake_tps")
            eparams2 = ExperimentParameters(PrivacyBudget(0.5, 0.001), 1, 5,
                                            "fake_tps")

            trial_descriptors = [
                TrialDescriptor(msd, sparams1, eparams1),
                TrialDescriptor(msd, sparams1, eparams2),
                TrialDescriptor(msd, sparams2, eparams1),
                TrialDescriptor(msd, sparams2, eparams2),
            ]

            exp = Experiment(experiment_dir, data_design, "dataset",
                             trial_descriptors)
            trials = exp.generate_trials()
            self.assertLen(trials, 4)
Esempio n. 11
0
    def setUpClass(cls):
        pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                             "pdf1")
        pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
        data_set = DataSet([pdf1, pdf2], "test")

        cls.params = SystemParameters([0.4, 0.5], LiquidLegionsParameters(),
                                      np.random.default_rng(1))
        cls.privacy_tracker = PrivacyTracker()
        cls.halo = HaloSimulator(data_set, cls.params, cls.privacy_tracker)
Esempio n. 12
0
    def test_single_publisher_strategy(self, mock_gamma_poisson_model):
        mock_gamma_poisson_model.return_value = (30000, 10000, 1.0, 2.0)
        halo = FakeHalo()
        params = SystemParameters([100.0], LiquidLegionsParameters(),
                                  np.random.default_rng(seed=1))
        budget = PrivacyBudget(1.0, 1e-5)
        single_publisher_strategy = SinglePublisherStrategy(
            GammaPoissonModel, {}, None, {})
        surface = single_publisher_strategy.fit(halo, params, budget)

        expected0 = surface.by_spend([100.0]).reach(1)
        actual0 = halo.simulated_reach_by_spend([100.0], budget).reach(1)
        self.assertAlmostEqual(expected0, actual0, delta=10)
Esempio n. 13
0
    def test_form_venn_diagram_regions_with_publishers_more_than_limit(self):
        num_publishers = MAX_ACTIVE_PUBLISHERS + 1
        data_set = DataSet(
            [
                PublisherData([(1, 0.01)], f"pdf{i + 1}")
                for i in range(num_publishers)
            ],
            "test",
        )
        params = SystemParameters([0.4] * num_publishers,
                                  LiquidLegionsParameters(),
                                  np.random.default_rng(1))
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        spends = [0.01] * num_publishers
        with self.assertRaises(ValueError):
            halo._form_venn_diagram_regions(spends)
Esempio n. 14
0
    def test_scale_up_reach_in_primitive_regions(
        self,
        mock_geometric_estimate_noiser,
        regions,
        true_cardinality,
        std,
        budget,
        privacy_budget_split,
        fixed_noise,
        expected,
    ):
        mock_geometric_estimate_noiser.return_value = FakeNoiser(fixed_noise)

        params = SystemParameters([0], LiquidLegionsParameters(),
                                  FakeRandomGenerator())
        halo = HaloSimulator(DataSet([], "test"), params, PrivacyTracker())

        scaled_regions = halo._scale_up_reach_in_primitive_regions(
            regions, true_cardinality, std, budget, privacy_budget_split)

        self.assertEqual(scaled_regions, expected)

        self.assertEqual(halo.privacy_tracker.privacy_consumption.epsilon,
                         budget.epsilon * privacy_budget_split)
        self.assertEqual(halo.privacy_tracker.privacy_consumption.delta,
                         budget.delta * privacy_budget_split)
        self.assertEqual(len(halo.privacy_tracker._noising_events), 1)
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].budget.epsilon,
            budget.epsilon * privacy_budget_split,
        )
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].budget.delta,
            budget.delta * privacy_budget_split,
        )
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].mechanism,
            DP_NOISE_MECHANISM_DISCRETE_LAPLACE,
        )
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].params,
            {"privacy_budget_split": privacy_budget_split},
        )
Esempio n. 15
0
    def test_make_independent_vars_dataframe(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "test_point_strategy")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._make_independent_vars_dataframe()

            expected_trial_name = "strategy,single_pub_model,multi_pub_model,spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0,epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5,test_point_strategy=test_point_strategy"

            expected = pd.DataFrame({
                "dataset": ["dataset"],
                "trial": [expected_trial_name],
                "replica_id": [3],
                "single_pub_model": ["single_pub_model"],
                "multi_pub_model": ["multi_pub_model"],
                "strategy": ["strategy"],
                "liquid_legions_sketch_size": [1e6],
                "liquid_legions_decay_rate": [13],
                "maximum_reach": [4],
                "ncampaigns": [2],
                "largest_pub_reach": [3],
                "max_frequency": [5],
                "average_spend_fraction": [0.04],
            })
            pd.testing.assert_frame_equal(actual, expected)
Esempio n. 16
0
    def test_form_venn_diagram_regions(self, num_publishers, spends, max_freq,
                                       expected):
        pdfs = [
            PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1"),
            PublisherData([(2, 0.03), (4, 0.06)], "pdf2"),
            PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"),
        ]
        data_set = DataSet(pdfs[:num_publishers], "test")
        params = SystemParameters(
            [0.4] * num_publishers,
            LiquidLegionsParameters(),
            np.random.default_rng(1),
        )
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        regions = halo._form_venn_diagram_regions(spends, max_freq)
        self.assertEqual(expected, regions)
Esempio n. 17
0
    def test_generate_reach_points_from_venn_diagram(self, num_publishers,
                                                     spends, regions,
                                                     expected):
        pdfs = [
            PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1"),
            PublisherData([(2, 0.03), (4, 0.06)], "pdf2"),
            PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"),
        ]
        data_set = DataSet(pdfs[:num_publishers], "test")
        params = SystemParameters(
            [0.4] * num_publishers,
            LiquidLegionsParameters(),
            np.random.default_rng(1),
        )
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        # Note that the reach points generated from the Venn diagram only
        # contain 1+ reaches.
        reach_points = halo._generate_reach_points_from_venn_diagram(
            spends, regions)

        self.assertEqual(len(reach_points), len(expected))

        for i, (r_pt, expected_r_pt) in enumerate(zip(reach_points, expected)):
            self.assertEqual(
                r_pt.impressions,
                expected_r_pt.impressions,
                msg=f"The impressions of No.{i + 1} reach point is not correct",
            )
            self.assertEqual(
                r_pt.reach(1),
                expected_r_pt.reach(1),
                msg=f"The reach of No.{i + 1} reach point is not correct",
            )
            self.assertEqual(
                r_pt.spends,
                expected_r_pt.spends,
                msg=f"The spends of No.{i + 1} reach point is not correct",
            )
Esempio n. 18
0
def generate_experimental_design_config(
        seed: int = 1) -> Iterable[TrialDescriptor]:
    """Generates a list of TrialDescriptors for the 1st round eval of M3."""
    for level_combination in itertools.product(*LEVELS.values()):
        design_parameters = dict(zip(LEVELS.keys(), level_combination))
        mstrategy = design_parameters["modeling_strategies"]
        sparams = SystemParameters(
            liquid_legions=design_parameters["liquid_legions_params"],
            generator=np.random.default_rng(seed=seed),
            campaign_spend_fractions_generator=design_parameters[
                "campaign_spend_fractions_generators"],
        )
        test_point_generator, test_point_params = design_parameters[
            "test_point_strategies"]
        eparams = ExperimentParameters(
            design_parameters["privacy_budgets"],
            design_parameters["replica_ids"],
            design_parameters["max_frequencies"],
            test_point_generator,
            test_point_params,
        )
        yield TrialDescriptor(mstrategy, sparams, eparams)
Esempio n. 19
0
    def test_add_dp_noise_to_primitive_regions(
        self,
        mock_geometric_estimate_noiser,
        regions,
        budget,
        privacy_budget_split,
        fixed_noise,
        expected_regions,
    ):
        mock_geometric_estimate_noiser.return_value = FakeNoiser(fixed_noise)

        halo = HaloSimulator(DataSet([], "test"), SystemParameters(),
                             PrivacyTracker())

        noised_regions = halo._add_dp_noise_to_primitive_regions(
            regions, budget, privacy_budget_split)

        self.assertEqual(noised_regions, expected_regions)
        self.assertEqual(halo.privacy_tracker.privacy_consumption.epsilon,
                         budget.epsilon * privacy_budget_split)
        self.assertEqual(halo.privacy_tracker.privacy_consumption.delta,
                         budget.delta * privacy_budget_split)
        self.assertEqual(len(halo.privacy_tracker._noising_events), 1)
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].budget.epsilon,
            budget.epsilon * privacy_budget_split,
        )
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].budget.delta,
            budget.delta * privacy_budget_split,
        )
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].mechanism,
            DP_NOISE_MECHANISM_DISCRETE_LAPLACE,
        )
        self.assertEqual(
            halo.privacy_tracker._noising_events[0].params,
            {"privacy_budget_split": privacy_budget_split},
        )
Esempio n. 20
0
    def test_evaluate_single_publisher_model(self):
        with TemporaryDirectory() as d:
            data1 = HeterogeneousImpressionGenerator(1000,
                                                     gamma_shape=1.0,
                                                     gamma_scale=3.0)()
            pdf1 = PublisherData(FixedPriceGenerator(0.1)(data1))
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("single_publisher", {}, "goerg",
                                             {}, "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "grid", {"grid_size": 5})
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(
                experiment_dir,
                data_design,
                "dataset",
                trial_descriptor,
                analysis_type="single_pub",
            )
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertAlmostEqual(result["relative_error_at_100"][0],
                                   0.0,
                                   delta=0.01)
            self.assertGreater(result["max_nonzero_frequency_from_halo"][0], 0)
            self.assertEqual(result["max_nonzero_frequency_from_data"][0], 5)
Esempio n. 21
0
    def test_evaluate_when_there_is_a_modeling_exception(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)],
                                 "pdf1")
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = GoergModelingStrategy
            TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator

            msd = ModelingStrategyDescriptor("fake", {}, "goerg", {},
                                             "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "fake_tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(experiment_dir, data_design, "dataset",
                                      trial_descriptor)
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertEqual(result["dataset"][0], "dataset")
            self.assertEqual(result["replica_id"][0], 3)
            self.assertEqual(result["privacy_budget_epsilon"][0], 1.0)
            self.assertEqual(result["model_succeeded"][0], 0)
            self.assertEqual(
                result["model_exception"][0],
                "Cannot fit Goerg model when impressions <= reach.",
            )
Esempio n. 22
0
    def test_m3_strategy(self, mock_gamma_poisson_model):
        mock_gamma_poisson_model.return_value = (30000, 10000, 1.0, 2.0)
        halo = FakeHalo()
        params = SystemParameters(
            [100.0, 100.0], LiquidLegionsParameters(), np.random.default_rng(seed=1)
        )
        budget = PrivacyBudget(1.0, 1e-5)
        m3strategy = M3Strategy(
            GammaPoissonModel, {}, RestrictedPairwiseUnionReachSurface, {}
        )
        surface = m3strategy.fit(halo, params, budget)

        expected0 = surface.by_spend([100.0, 0.0]).reach(1)
        actual0 = halo.simulated_reach_by_spend([100.0, 0.0], budget).reach(1)
        self.assertAlmostEqual(expected0, actual0, delta=10)

        expected1 = surface.by_spend([0.0, 100.0]).reach(1)
        actual1 = halo.simulated_reach_by_spend([0.0, 100.0], budget).reach(1)
        self.assertAlmostEqual(expected1, actual1, delta=10)

        expected2 = surface.by_spend([100.0, 100.0]).reach(1)
        actual2 = halo.simulated_reach_by_spend([100.0, 100.0], budget).reach(1)
        self.assertAlmostEqual(expected2, actual2, delta=1000)
Esempio n. 23
0
    def test_evaluate_singe_publisher_model_with_exception(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)],
                                 "pdf1")
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = GoergModelingStrategy
            TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator

            msd = ModelingStrategyDescriptor("fake", {}, "goerg", {},
                                             "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "fake_tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(
                experiment_dir,
                data_design,
                "dataset",
                trial_descriptor,
                analysis_type="single_pub",
            )
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertTrue(math.isnan(result["relative_error_at_100"][0]))
Esempio n. 24
0
    def test_simulated_venn_diagram_reach_by_spend(
        self,
        mock_geometric_estimate_noiser,
        mock_cardinality_estimate_variance,
        spends,
        budget,
        privacy_budget_split,
        fixed_noise,
        expected_reach_points,
    ):
        mock_geometric_estimate_noiser.return_value = FakeNoiser(fixed_noise)

        pdfs = [
            PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1"),
            PublisherData([(2, 0.03), (4, 0.06)], "pdf2"),
            PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"),
        ]
        data_set = DataSet(pdfs, "test")
        params = SystemParameters(
            [0.4, 0.5, 0.4],
            LiquidLegionsParameters(),
            FakeRandomGenerator(),
        )
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        reach_points = halo.simulated_venn_diagram_reach_by_spend(
            spends, budget, privacy_budget_split)

        # Examine reach points
        for i, (r_pt, expected_r_pt) in enumerate(
                zip(reach_points, expected_reach_points)):
            self.assertEqual(
                r_pt.impressions,
                expected_r_pt.impressions,
                msg=f"The impressions of No.{i + 1} reach point is not correct",
            )
            self.assertEqual(
                r_pt.reach(1),
                expected_r_pt.reach(1),
                msg=f"The reach of No.{i + 1} reach point is not correct",
            )
            self.assertEqual(
                r_pt.spends,
                expected_r_pt.spends,
                msg=f"The spends of No.{i + 1} reach point is not correct",
            )

        # Examine privacy tracker
        expected_noise_event_primitive_regions = NoisingEvent(
            PrivacyBudget(
                budget.epsilon * privacy_budget_split,
                budget.delta * privacy_budget_split,
            ),
            DP_NOISE_MECHANISM_DISCRETE_LAPLACE,
            {"privacy_budget_split": privacy_budget_split},
        )

        expected_noise_event_cardinality = NoisingEvent(
            PrivacyBudget(
                budget.epsilon * (1 - privacy_budget_split),
                budget.delta * (1 - privacy_budget_split),
            ),
            DP_NOISE_MECHANISM_DISCRETE_LAPLACE,
            {"privacy_budget_split": (1 - privacy_budget_split)},
        )

        expected_noise_events = [
            expected_noise_event_primitive_regions,
            expected_noise_event_cardinality,
        ]

        self.assertEqual(
            halo.privacy_tracker.privacy_consumption.epsilon,
            expected_noise_event_primitive_regions.budget.epsilon +
            expected_noise_event_cardinality.budget.epsilon,
        )
        self.assertEqual(
            halo.privacy_tracker.privacy_consumption.delta,
            expected_noise_event_primitive_regions.budget.delta +
            expected_noise_event_cardinality.budget.delta,
        )
        self.assertEqual(len(halo.privacy_tracker._noising_events), 2)

        for noise_event, expected_noise_event in zip(
                halo.privacy_tracker._noising_events, expected_noise_events):
            self.assertEqual(
                noise_event.budget.epsilon,
                expected_noise_event.budget.epsilon,
            )
            self.assertEqual(
                noise_event.budget.delta,
                expected_noise_event.budget.delta,
            )
            self.assertEqual(
                noise_event.mechanism,
                expected_noise_event.mechanism,
            )
            self.assertEqual(
                noise_event.params,
                expected_noise_event.params,
            )