def test_compute_trial_results_path(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "dataset") data_design = DataDesign(join(d, "data_design")) data_design.add(data_set) msd = ModelingStrategyDescriptor("strategy", {}, "single_pub_model", {}, "multi_pub_model", {}) sparams = SystemParameters( [0.03, 0.05], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "tps") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial("edir", data_design, "dataset", trial_descriptor) actual = trial._compute_trial_results_path() expected = "{}/{}/{},{},{},{}".format( "edir", "dataset", "strategy,single_pub_model,multi_pub_model", "spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0", "epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5", "test_point_strategy=tps.csv", ) self.assertEqual(actual, expected)
def test_simulated_venn_diagram_reach_by_spend_without_active_pub(self): pdfs = [ PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1"), PublisherData([(2, 0.03), (4, 0.06)], "pdf2"), PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"), ] data_set = DataSet(pdfs, "test") params = SystemParameters( [0.4, 0.5, 0.4], LiquidLegionsParameters(), FakeRandomGenerator(), ) privacy_tracker = PrivacyTracker() halo = HaloSimulator(data_set, params, privacy_tracker) spends = [0, 0, 0] budget = PrivacyBudget(0.2, 0.4) privacy_budget_split = 0.5 max_freq = 1 reach_points = halo.simulated_venn_diagram_reach_by_spend( spends, budget, privacy_budget_split, max_freq) expected_reach_points = [] self.assertEqual(expected_reach_points, reach_points) self.assertEqual(halo.privacy_tracker.privacy_consumption.epsilon, 0) self.assertEqual(halo.privacy_tracker.privacy_consumption.delta, 0) self.assertEqual(len(halo.privacy_tracker._noising_events), 0)
def generate_experimental_design_config( seed: int = 1) -> Iterable[TrialDescriptor]: """Generates a list of TrialDescriptors for a single publisher model.""" keys = list(LEVELS.keys()) levels = [LEVELS[k] for k in keys] for sample in itertools.product(*levels): design_parameters = dict(zip(keys, sample)) mstrategy = ModelingStrategyDescriptor( "single_publisher", {}, design_parameters["modeling_strategy"][0], design_parameters["modeling_strategy"][1], "none", {}, ) sparams = SystemParameters( [design_parameters["campaign_spend_fraction"]], LiquidLegionsParameters( design_parameters["liquid_legions_decay_rate"], design_parameters["liquid_legions_sketch_size"], ), np.random.default_rng(seed=seed), ) eparams = ExperimentParameters( PrivacyBudget( design_parameters["privacy_budget_epsilon"], design_parameters["privacy_budget_delta"], ), design_parameters["replica_id"], design_parameters["max_frequency"], "grid", {"grid_size": design_parameters["test_points"]}, ) yield TrialDescriptor(mstrategy, sparams, eparams)
def test_m3_strategy_with_ground_truth(self): data1 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=2)() publisher1 = PublisherData(FixedPriceGenerator(0.1)(data1)) data2 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=3)() publisher2 = PublisherData(FixedPriceGenerator(0.05)(data2)) dataset = DataSet([publisher1, publisher2], "dataset") params = SystemParameters( [100.0, 100.0], LiquidLegionsParameters(), np.random.default_rng(seed=1) ) halo = HaloSimulator(dataset, params, PrivacyTracker()) budget = PrivacyBudget(1.0, 1e-5) m3strategy = M3Strategy( GammaPoissonModel, {}, RestrictedPairwiseUnionReachSurface, {}, use_ground_truth_for_reach_curves=True, ) surface = m3strategy.fit(halo, params, budget) expected0 = surface.by_spend([10.0, 0.0]).reach(1) actual0 = dataset.reach_by_spend([10.0, 0.0]).reach(1) self.assertAlmostEqual(expected0, actual0, delta=1) expected1 = surface.by_spend([0.0, 10.0]).reach(1) actual1 = dataset.reach_by_spend([0.0, 10.0]).reach(1) self.assertAlmostEqual(expected1, actual1, delta=1) expected2 = surface.by_spend([10.0, 10.0]).reach(1) actual2 = dataset.reach_by_spend([10.0, 10.0]).reach(1) self.assertAlmostEqual(expected2, actual2, delta=10)
def test_sample_venn_diagram(self, regions, sample_size, expected): params = SystemParameters([0], LiquidLegionsParameters(), FakeRandomGenerator()) halo = HaloSimulator(DataSet([], "test"), params, PrivacyTracker()) self.assertEqual( halo._sample_venn_diagram(regions, sample_size), expected, )
def setUpClass(cls): pdf = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") cls.params = SystemParameters([1.0, 0.5, 3.0], LiquidLegionsParameters(), np.random.default_rng(1)) cls.privacy_tracker = PrivacyTracker() cls.publisher = Publisher(pdf, 1, cls.params, cls.privacy_tracker)
def setUpClass(cls): cls.simulator = PlannerSimulator( FakeHaloSimulator(), FakeModelingStrategy(), SystemParameters([], LiquidLegionsParameters(), np.random.default_rng(1)), PrivacyTracker(), )
def test_evaluate(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) MODELING_STRATEGIES["fake"] = FakeModelingStrategy TEST_POINT_STRATEGIES[ "fake_tps"] = lambda ds, rng: FakeTestPointGenerator( ).test_points() msd = ModelingStrategyDescriptor("fake", {"x": 1}, "goerg", {}, "pairwise_union", {}) sparams1 = SystemParameters( [0.03, 0.05], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) sparams2 = SystemParameters( [0.05, 0.03], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams1 = ExperimentParameters(PrivacyBudget(1.0, 0.01), 1, 5, "fake_tps") eparams2 = ExperimentParameters(PrivacyBudget(0.5, 0.001), 1, 5, "fake_tps") trial_descriptors = [ TrialDescriptor(msd, sparams1, eparams1), TrialDescriptor(msd, sparams1, eparams2), TrialDescriptor(msd, sparams2, eparams1), TrialDescriptor(msd, sparams2, eparams2), ] exp = Experiment(experiment_dir, data_design, "dataset", trial_descriptors) trials = exp.generate_trials() self.assertLen(trials, 4)
def setUpClass(cls): pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "test") cls.params = SystemParameters([0.4, 0.5], LiquidLegionsParameters(), np.random.default_rng(1)) cls.privacy_tracker = PrivacyTracker() cls.halo = HaloSimulator(data_set, cls.params, cls.privacy_tracker)
def test_single_publisher_strategy(self, mock_gamma_poisson_model): mock_gamma_poisson_model.return_value = (30000, 10000, 1.0, 2.0) halo = FakeHalo() params = SystemParameters([100.0], LiquidLegionsParameters(), np.random.default_rng(seed=1)) budget = PrivacyBudget(1.0, 1e-5) single_publisher_strategy = SinglePublisherStrategy( GammaPoissonModel, {}, None, {}) surface = single_publisher_strategy.fit(halo, params, budget) expected0 = surface.by_spend([100.0]).reach(1) actual0 = halo.simulated_reach_by_spend([100.0], budget).reach(1) self.assertAlmostEqual(expected0, actual0, delta=10)
def test_class_setup_with_campaign_spend_fractions_generator(self): pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "test") params = SystemParameters( liquid_legions=LiquidLegionsParameters(), generator=np.random.default_rng(1), campaign_spend_fractions_generator=lambda dataset: [0.2] * dataset. publisher_count, ) params = params.update_from_dataset(data_set) privacy_tracker = PrivacyTracker() halo = HaloSimulator(data_set, params, privacy_tracker) self.assertAlmostEqual(halo._campaign_spends[0], 0.01, 7) # using assertAlmostEqual here because of a rounding error self.assertAlmostEqual(halo._campaign_spends[1], 0.012, 7)
def test_form_venn_diagram_regions_with_publishers_more_than_limit(self): num_publishers = MAX_ACTIVE_PUBLISHERS + 1 data_set = DataSet( [ PublisherData([(1, 0.01)], f"pdf{i + 1}") for i in range(num_publishers) ], "test", ) params = SystemParameters([0.4] * num_publishers, LiquidLegionsParameters(), np.random.default_rng(1)) privacy_tracker = PrivacyTracker() halo = HaloSimulator(data_set, params, privacy_tracker) spends = [0.01] * num_publishers with self.assertRaises(ValueError): halo._form_venn_diagram_regions(spends)
def test_make_independent_vars_dataframe(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1") pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2") data_set = DataSet([pdf1, pdf2], "dataset") data_design = DataDesign(join(d, "data_design")) data_design.add(data_set) msd = ModelingStrategyDescriptor("strategy", {}, "single_pub_model", {}, "multi_pub_model", {}) sparams = SystemParameters( [0.03, 0.05], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "test_point_strategy") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial("edir", data_design, "dataset", trial_descriptor) actual = trial._make_independent_vars_dataframe() expected_trial_name = "strategy,single_pub_model,multi_pub_model,spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0,epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5,test_point_strategy=test_point_strategy" expected = pd.DataFrame({ "dataset": ["dataset"], "trial": [expected_trial_name], "replica_id": [3], "single_pub_model": ["single_pub_model"], "multi_pub_model": ["multi_pub_model"], "strategy": ["strategy"], "liquid_legions_sketch_size": [1e6], "liquid_legions_decay_rate": [13], "maximum_reach": [4], "ncampaigns": [2], "largest_pub_reach": [3], "max_frequency": [5], "average_spend_fraction": [0.04], }) pd.testing.assert_frame_equal(actual, expected)
def test_form_venn_diagram_regions(self, num_publishers, spends, max_freq, expected): pdfs = [ PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1"), PublisherData([(2, 0.03), (4, 0.06)], "pdf2"), PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"), ] data_set = DataSet(pdfs[:num_publishers], "test") params = SystemParameters( [0.4] * num_publishers, LiquidLegionsParameters(), np.random.default_rng(1), ) privacy_tracker = PrivacyTracker() halo = HaloSimulator(data_set, params, privacy_tracker) regions = halo._form_venn_diagram_regions(spends, max_freq) self.assertEqual(expected, regions)
def test_scale_up_reach_in_primitive_regions( self, mock_geometric_estimate_noiser, regions, true_cardinality, std, budget, privacy_budget_split, fixed_noise, expected, ): mock_geometric_estimate_noiser.return_value = FakeNoiser(fixed_noise) params = SystemParameters([0], LiquidLegionsParameters(), FakeRandomGenerator()) halo = HaloSimulator(DataSet([], "test"), params, PrivacyTracker()) scaled_regions = halo._scale_up_reach_in_primitive_regions( regions, true_cardinality, std, budget, privacy_budget_split) self.assertEqual(scaled_regions, expected) self.assertEqual(halo.privacy_tracker.privacy_consumption.epsilon, budget.epsilon * privacy_budget_split) self.assertEqual(halo.privacy_tracker.privacy_consumption.delta, budget.delta * privacy_budget_split) self.assertEqual(len(halo.privacy_tracker._noising_events), 1) self.assertEqual( halo.privacy_tracker._noising_events[0].budget.epsilon, budget.epsilon * privacy_budget_split, ) self.assertEqual( halo.privacy_tracker._noising_events[0].budget.delta, budget.delta * privacy_budget_split, ) self.assertEqual( halo.privacy_tracker._noising_events[0].mechanism, DP_NOISE_MECHANISM_DISCRETE_LAPLACE, ) self.assertEqual( halo.privacy_tracker._noising_events[0].params, {"privacy_budget_split": privacy_budget_split}, )
def test_generate_reach_points_from_venn_diagram(self, num_publishers, spends, regions, expected): pdfs = [ PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1"), PublisherData([(2, 0.03), (4, 0.06)], "pdf2"), PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"), ] data_set = DataSet(pdfs[:num_publishers], "test") params = SystemParameters( [0.4] * num_publishers, LiquidLegionsParameters(), np.random.default_rng(1), ) privacy_tracker = PrivacyTracker() halo = HaloSimulator(data_set, params, privacy_tracker) # Note that the reach points generated from the Venn diagram only # contain 1+ reaches. reach_points = halo._generate_reach_points_from_venn_diagram( spends, regions) self.assertEqual(len(reach_points), len(expected)) for i, (r_pt, expected_r_pt) in enumerate(zip(reach_points, expected)): self.assertEqual( r_pt.impressions, expected_r_pt.impressions, msg=f"The impressions of No.{i + 1} reach point is not correct", ) self.assertEqual( r_pt.reach(1), expected_r_pt.reach(1), msg=f"The reach of No.{i + 1} reach point is not correct", ) self.assertEqual( r_pt.spends, expected_r_pt.spends, msg=f"The spends of No.{i + 1} reach point is not correct", )
def test_evaluate_single_publisher_model(self): with TemporaryDirectory() as d: data1 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=3.0)() pdf1 = PublisherData(FixedPriceGenerator(0.1)(data1)) data_set = DataSet([pdf1], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) msd = ModelingStrategyDescriptor("single_publisher", {}, "goerg", {}, "pairwise_union", {}) sparams = SystemParameters( [0.5], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "grid", {"grid_size": 5}) trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial( experiment_dir, data_design, "dataset", trial_descriptor, analysis_type="single_pub", ) result = trial.evaluate(seed=1) # We don't check each column in the resulting dataframe, because these have # been checked by the preceding unit tests. However, we make a few strategic # probes. self.assertEqual(result.shape[0], 1) self.assertAlmostEqual(result["relative_error_at_100"][0], 0.0, delta=0.01) self.assertGreater(result["max_nonzero_frequency_from_halo"][0], 0) self.assertEqual(result["max_nonzero_frequency_from_data"][0], 5)
def test_evaluate_when_there_is_a_modeling_exception(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)], "pdf1") data_set = DataSet([pdf1], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) MODELING_STRATEGIES["fake"] = GoergModelingStrategy TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator msd = ModelingStrategyDescriptor("fake", {}, "goerg", {}, "pairwise_union", {}) sparams = SystemParameters( [0.5], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "fake_tps") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial(experiment_dir, data_design, "dataset", trial_descriptor) result = trial.evaluate(seed=1) # We don't check each column in the resulting dataframe, because these have # been checked by the preceding unit tests. However, we make a few strategic # probes. self.assertEqual(result.shape[0], 1) self.assertEqual(result["dataset"][0], "dataset") self.assertEqual(result["replica_id"][0], 3) self.assertEqual(result["privacy_budget_epsilon"][0], 1.0) self.assertEqual(result["model_succeeded"][0], 0) self.assertEqual( result["model_exception"][0], "Cannot fit Goerg model when impressions <= reach.", )
def test_m3_strategy(self, mock_gamma_poisson_model): mock_gamma_poisson_model.return_value = (30000, 10000, 1.0, 2.0) halo = FakeHalo() params = SystemParameters( [100.0, 100.0], LiquidLegionsParameters(), np.random.default_rng(seed=1) ) budget = PrivacyBudget(1.0, 1e-5) m3strategy = M3Strategy( GammaPoissonModel, {}, RestrictedPairwiseUnionReachSurface, {} ) surface = m3strategy.fit(halo, params, budget) expected0 = surface.by_spend([100.0, 0.0]).reach(1) actual0 = halo.simulated_reach_by_spend([100.0, 0.0], budget).reach(1) self.assertAlmostEqual(expected0, actual0, delta=10) expected1 = surface.by_spend([0.0, 100.0]).reach(1) actual1 = halo.simulated_reach_by_spend([0.0, 100.0], budget).reach(1) self.assertAlmostEqual(expected1, actual1, delta=10) expected2 = surface.by_spend([100.0, 100.0]).reach(1) actual2 = halo.simulated_reach_by_spend([100.0, 100.0], budget).reach(1) self.assertAlmostEqual(expected2, actual2, delta=1000)
def test_evaluate_singe_publisher_model_with_exception(self): with TemporaryDirectory() as d: pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)], "pdf1") data_set = DataSet([pdf1], "dataset") data_design_dir = join(d, "data_design") experiment_dir = join(d, "experiments") data_design = DataDesign(data_design_dir) data_design.add(data_set) MODELING_STRATEGIES["fake"] = GoergModelingStrategy TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator msd = ModelingStrategyDescriptor("fake", {}, "goerg", {}, "pairwise_union", {}) sparams = SystemParameters( [0.5], LiquidLegionsParameters(13, 1e6, 1), np.random.default_rng(), ) eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5, "fake_tps") trial_descriptor = TrialDescriptor(msd, sparams, eparams) trial = ExperimentalTrial( experiment_dir, data_design, "dataset", trial_descriptor, analysis_type="single_pub", ) result = trial.evaluate(seed=1) # We don't check each column in the resulting dataframe, because these have # been checked by the preceding unit tests. However, we make a few strategic # probes. self.assertEqual(result.shape[0], 1) self.assertTrue(math.isnan(result["relative_error_at_100"][0]))
{}, ), ModelingStrategyDescriptor("m3strategy", {}, "goerg", {}, "restricted_pairwise_union", {}), ModelingStrategyDescriptor("m3strategy", {}, "gamma_poisson", {}, "restricted_pairwise_union", {}), ] CAMPAIGN_SPEND_FRACTIONS_GENERATORS = [ lambda dataset: [0.2] * dataset.publisher_count, lambda dataset: list( islice(cycle([0.1, 0.2, 0.3]), dataset.publisher_count)), ] LIQUID_LEGIONS_PARAMS = [ LiquidLegionsParameters(12, 1e5), ] PRIVACY_BUDGETS = [ PrivacyBudget(1.0, 1e-7), PrivacyBudget(1.0, 1e-9), PrivacyBudget(0.1, 1e-7), PrivacyBudget(0.1, 1e-9), ] REPLICA_IDS = [1, 2, 3] MAX_FREQUENCIES = [5, 20] TEST_POINT_STRATEGIES = [ ("latin_hypercube", {
TrialDescriptor, ) MODELING_STRATEGIES = [ ModelingStrategyDescriptor("m3strategy", {}, "goerg", {}, "restricted_pairwise_union", {}), ModelingStrategyDescriptor("m3strategy", {}, "gamma_poisson", {}, "restricted_pairwise_union", {}), ] CAMPAIGN_SPEND_FRACTIONS_GENERATORS = [ lambda dataset: [0.6] * dataset.publisher_count, lambda dataset: list(islice(cycle([0.4, 0.8]), dataset.publisher_count)), ] LIQUID_LEGIONS_PARAMS = [ LiquidLegionsParameters(10, 8000), ] PRIVACY_BUDGETS = [ PrivacyBudget(2, 1e-9), PrivacyBudget(0.5, 1e-9), ] REPLICA_IDS = [1, 2, 3] MAX_FREQUENCIES = [3, 6] TEST_POINT_STRATEGIES = [ ("latin_hypercube", { "npublishers": 1, "minimum_points_per_publisher": 10
MODELING_STRATEGIES = [ ModelingStrategyDescriptor( "m3strategy", {}, "goerg", {}, "restricted_pairwise_union", {} ), ModelingStrategyDescriptor( "m3strategy", {}, "gamma_poisson", {}, "restricted_pairwise_union", {} ), ] # Here we assume all data sets specify exactly two campaigns CAMPAIGN_SPEND_FRACTIONS = list( itertools.product(np.arange(1, 10) * 0.1, np.arange(1, 10) * 0.1) ) LIQUID_LEGIONS_PARAMS = [ LiquidLegionsParameters(10, 1e5), LiquidLegionsParameters(12, 1e5), LiquidLegionsParameters(17, 1e5), LiquidLegionsParameters(10, 2e5), LiquidLegionsParameters(12, 2e5), LiquidLegionsParameters(17, 2e5), ] PRIVACY_BUDGETS = [ PrivacyBudget(1.0, 1e-7), PrivacyBudget(1.0, 1e-8), PrivacyBudget(0.1, 1e-7), PrivacyBudget(0.1, 1e-8), ] REPLICA_IDS = [1, 2, 3]
def test_simulated_venn_diagram_reach_by_spend( self, mock_geometric_estimate_noiser, mock_cardinality_estimate_variance, spends, budget, privacy_budget_split, fixed_noise, expected_reach_points, ): mock_geometric_estimate_noiser.return_value = FakeNoiser(fixed_noise) pdfs = [ PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)], "pdf1"), PublisherData([(2, 0.03), (4, 0.06)], "pdf2"), PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"), ] data_set = DataSet(pdfs, "test") params = SystemParameters( [0.4, 0.5, 0.4], LiquidLegionsParameters(), FakeRandomGenerator(), ) privacy_tracker = PrivacyTracker() halo = HaloSimulator(data_set, params, privacy_tracker) reach_points = halo.simulated_venn_diagram_reach_by_spend( spends, budget, privacy_budget_split) # Examine reach points for i, (r_pt, expected_r_pt) in enumerate( zip(reach_points, expected_reach_points)): self.assertEqual( r_pt.impressions, expected_r_pt.impressions, msg=f"The impressions of No.{i + 1} reach point is not correct", ) self.assertEqual( r_pt.reach(1), expected_r_pt.reach(1), msg=f"The reach of No.{i + 1} reach point is not correct", ) self.assertEqual( r_pt.spends, expected_r_pt.spends, msg=f"The spends of No.{i + 1} reach point is not correct", ) # Examine privacy tracker expected_noise_event_primitive_regions = NoisingEvent( PrivacyBudget( budget.epsilon * privacy_budget_split, budget.delta * privacy_budget_split, ), DP_NOISE_MECHANISM_DISCRETE_LAPLACE, {"privacy_budget_split": privacy_budget_split}, ) expected_noise_event_cardinality = NoisingEvent( PrivacyBudget( budget.epsilon * (1 - privacy_budget_split), budget.delta * (1 - privacy_budget_split), ), DP_NOISE_MECHANISM_DISCRETE_LAPLACE, {"privacy_budget_split": (1 - privacy_budget_split)}, ) expected_noise_events = [ expected_noise_event_primitive_regions, expected_noise_event_cardinality, ] self.assertEqual( halo.privacy_tracker.privacy_consumption.epsilon, expected_noise_event_primitive_regions.budget.epsilon + expected_noise_event_cardinality.budget.epsilon, ) self.assertEqual( halo.privacy_tracker.privacy_consumption.delta, expected_noise_event_primitive_regions.budget.delta + expected_noise_event_cardinality.budget.delta, ) self.assertEqual(len(halo.privacy_tracker._noising_events), 2) for noise_event, expected_noise_event in zip( halo.privacy_tracker._noising_events, expected_noise_events): self.assertEqual( noise_event.budget.epsilon, expected_noise_event.budget.epsilon, ) self.assertEqual( noise_event.budget.delta, expected_noise_event.budget.delta, ) self.assertEqual( noise_event.mechanism, expected_noise_event.mechanism, ) self.assertEqual( noise_event.params, expected_noise_event.params, )