def generate_experimental_design_config(seed: int = 1) -> Iterable[TrialDescriptor]:
    """Generates a list of TrialDescriptors.

    This examples illustrates a latin hypercube sampling strategy.
    """
    keys = LEVELS.keys()
    levels = [len(LEVELS[k]) for k in keys]
    np.random.seed(seed)
    for i, sample in enumerate(
        lhs(n=len(levels), samples=NUM_TRIALS_PER_DATASET, criterion="maximin")
    ):
        design_parameters = {}
        for key, level in zip(keys, sample):
            design_parameters[key] = LEVELS[key][int(level * len(LEVELS[key]))]
        mstrategy = design_parameters["modeling_strategies"]
        sparams = SystemParameters(
            design_parameters["campaign_spend_fractions"],
            design_parameters["liquid_legions_params"],
            np.random.default_rng(seed=seed),
        )
        test_point_generator, test_point_params = design_parameters[
            "test_point_strategies"
        ]
        eparams = ExperimentParameters(
            design_parameters["privacy_budgets"],
            design_parameters["replica_ids"],
            design_parameters["max_frequencies"],
            test_point_generator,
            test_point_params,
        )
        yield TrialDescriptor(mstrategy, sparams, eparams)
Example #2
0
    def test_compute_trial_results_path(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._compute_trial_results_path()
            expected = "{}/{}/{},{},{},{}".format(
                "edir",
                "dataset",
                "strategy,single_pub_model,multi_pub_model",
                "spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0",
                "epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5",
                "test_point_strategy=tps.csv",
            )
            self.assertEqual(actual, expected)
def generate_experimental_design_config(
        seed: int = 1) -> Iterable[TrialDescriptor]:
    """Generates a list of TrialDescriptors for a single publisher model."""
    keys = list(LEVELS.keys())
    levels = [LEVELS[k] for k in keys]
    for sample in itertools.product(*levels):
        design_parameters = dict(zip(keys, sample))
        mstrategy = ModelingStrategyDescriptor(
            "single_publisher",
            {},
            design_parameters["modeling_strategy"][0],
            design_parameters["modeling_strategy"][1],
            "none",
            {},
        )
        sparams = SystemParameters(
            [design_parameters["campaign_spend_fraction"]],
            LiquidLegionsParameters(
                design_parameters["liquid_legions_decay_rate"],
                design_parameters["liquid_legions_sketch_size"],
            ),
            np.random.default_rng(seed=seed),
        )
        eparams = ExperimentParameters(
            PrivacyBudget(
                design_parameters["privacy_budget_epsilon"],
                design_parameters["privacy_budget_delta"],
            ),
            design_parameters["replica_id"],
            design_parameters["max_frequency"],
            "grid",
            {"grid_size": design_parameters["test_points"]},
        )
        yield TrialDescriptor(mstrategy, sparams, eparams)
Example #4
0
    def test_evaluate(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = FakeModelingStrategy
            TEST_POINT_STRATEGIES[
                "fake_tps"] = lambda ds, rng: FakeTestPointGenerator(
                ).test_points()

            msd = ModelingStrategyDescriptor("fake", {"x": 1}, "goerg", {},
                                             "pairwise_union", {})
            sparams1 = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            sparams2 = SystemParameters(
                [0.05, 0.03],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams1 = ExperimentParameters(PrivacyBudget(1.0, 0.01), 1, 5,
                                            "fake_tps")
            eparams2 = ExperimentParameters(PrivacyBudget(0.5, 0.001), 1, 5,
                                            "fake_tps")

            trial_descriptors = [
                TrialDescriptor(msd, sparams1, eparams1),
                TrialDescriptor(msd, sparams1, eparams2),
                TrialDescriptor(msd, sparams2, eparams1),
                TrialDescriptor(msd, sparams2, eparams2),
            ]

            exp = Experiment(experiment_dir, data_design, "dataset",
                             trial_descriptors)
            trials = exp.generate_trials()
            self.assertLen(trials, 4)
Example #5
0
    def test_make_independent_vars_dataframe(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "test_point_strategy")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._make_independent_vars_dataframe()

            expected_trial_name = "strategy,single_pub_model,multi_pub_model,spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0,epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5,test_point_strategy=test_point_strategy"

            expected = pd.DataFrame({
                "dataset": ["dataset"],
                "trial": [expected_trial_name],
                "replica_id": [3],
                "single_pub_model": ["single_pub_model"],
                "multi_pub_model": ["multi_pub_model"],
                "strategy": ["strategy"],
                "liquid_legions_sketch_size": [1e6],
                "liquid_legions_decay_rate": [13],
                "maximum_reach": [4],
                "ncampaigns": [2],
                "largest_pub_reach": [3],
                "max_frequency": [5],
                "average_spend_fraction": [0.04],
            })
            pd.testing.assert_frame_equal(actual, expected)
Example #6
0
    def test_privacy_tracking_vars_dataframe(self):
        tracker = PrivacyTracker()
        eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 1, 3,
                                       "test_point_strategy")
        trial_descriptor = TrialDescriptor(None, None, eparams)
        trial = ExperimentalTrial("", None, "", trial_descriptor)

        actual0 = trial._make_privacy_tracking_vars_dataframe(tracker)
        expected0 = pd.DataFrame({
            "privacy_budget_epsilon": [1.0],
            "privacy_budget_delta": [0.01],
            "privacy_used_epsilon": [0.0],
            "privacy_used_delta": [0.0],
            "privacy_mechanisms": [""],
        })
        pd.testing.assert_frame_equal(actual0, expected0)

        tracker.append(
            NoisingEvent(PrivacyBudget(0.5, 0.005), DP_NOISE_MECHANISM_LAPLACE,
                         {}))
        actual1 = trial._make_privacy_tracking_vars_dataframe(tracker)
        expected1 = pd.DataFrame({
            "privacy_budget_epsilon": [1.0],
            "privacy_budget_delta": [0.01],
            "privacy_used_epsilon": [0.5],
            "privacy_used_delta": [0.005],
            "privacy_mechanisms": ["Laplace"],
        })
        pd.testing.assert_frame_equal(actual1, expected1)

        tracker.append(
            NoisingEvent(PrivacyBudget(0.2, 0.002),
                         DP_NOISE_MECHANISM_GAUSSIAN, {}))
        actual2 = trial._make_privacy_tracking_vars_dataframe(tracker)
        expected2 = pd.DataFrame({
            "privacy_budget_epsilon": [1.0],
            "privacy_budget_delta": [0.01],
            "privacy_used_epsilon": [0.7],
            "privacy_used_delta": [0.007],
            "privacy_mechanisms": ["Gaussian/Laplace"],
        })
        pd.testing.assert_frame_equal(actual2, expected2)
Example #7
0
def generate_experimental_design_config(
        seed: int = 1) -> Iterable[TrialDescriptor]:
    """Generates a list of TrialDescriptors for the 1st round eval of M3."""
    for level_combination in itertools.product(*LEVELS.values()):
        design_parameters = dict(zip(LEVELS.keys(), level_combination))
        mstrategy = design_parameters["modeling_strategies"]
        sparams = SystemParameters(
            liquid_legions=design_parameters["liquid_legions_params"],
            generator=np.random.default_rng(seed=seed),
            campaign_spend_fractions_generator=design_parameters[
                "campaign_spend_fractions_generators"],
        )
        test_point_generator, test_point_params = design_parameters[
            "test_point_strategies"]
        eparams = ExperimentParameters(
            design_parameters["privacy_budgets"],
            design_parameters["replica_ids"],
            design_parameters["max_frequencies"],
            test_point_generator,
            test_point_params,
        )
        yield TrialDescriptor(mstrategy, sparams, eparams)
Example #8
0
    def test_evaluate_single_publisher_model(self):
        with TemporaryDirectory() as d:
            data1 = HeterogeneousImpressionGenerator(1000,
                                                     gamma_shape=1.0,
                                                     gamma_scale=3.0)()
            pdf1 = PublisherData(FixedPriceGenerator(0.1)(data1))
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("single_publisher", {}, "goerg",
                                             {}, "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "grid", {"grid_size": 5})
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(
                experiment_dir,
                data_design,
                "dataset",
                trial_descriptor,
                analysis_type="single_pub",
            )
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertAlmostEqual(result["relative_error_at_100"][0],
                                   0.0,
                                   delta=0.01)
            self.assertGreater(result["max_nonzero_frequency_from_halo"][0], 0)
            self.assertEqual(result["max_nonzero_frequency_from_data"][0], 5)
Example #9
0
    def test_evaluate_when_there_is_a_modeling_exception(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)],
                                 "pdf1")
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = GoergModelingStrategy
            TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator

            msd = ModelingStrategyDescriptor("fake", {}, "goerg", {},
                                             "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "fake_tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(experiment_dir, data_design, "dataset",
                                      trial_descriptor)
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertEqual(result["dataset"][0], "dataset")
            self.assertEqual(result["replica_id"][0], 3)
            self.assertEqual(result["privacy_budget_epsilon"][0], 1.0)
            self.assertEqual(result["model_succeeded"][0], 0)
            self.assertEqual(
                result["model_exception"][0],
                "Cannot fit Goerg model when impressions <= reach.",
            )
Example #10
0
    def test_evaluate_singe_publisher_model_with_exception(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (3, 0.04), (4, 0.05)],
                                 "pdf1")
            data_set = DataSet([pdf1], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = GoergModelingStrategy
            TEST_POINT_STRATEGIES["fake_tps"] = GoergTestPointGenerator

            msd = ModelingStrategyDescriptor("fake", {}, "goerg", {},
                                             "pairwise_union", {})
            sparams = SystemParameters(
                [0.5],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "fake_tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(
                experiment_dir,
                data_design,
                "dataset",
                trial_descriptor,
                analysis_type="single_pub",
            )
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertTrue(math.isnan(result["relative_error_at_100"][0]))