Example #1
0
    def test_simulated_venn_diagram_reach_by_spend_without_active_pub(self):
        pdfs = [
            PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1"),
            PublisherData([(2, 0.03), (4, 0.06)], "pdf2"),
            PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"),
        ]
        data_set = DataSet(pdfs, "test")
        params = SystemParameters(
            [0.4, 0.5, 0.4],
            LiquidLegionsParameters(),
            FakeRandomGenerator(),
        )
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        spends = [0, 0, 0]
        budget = PrivacyBudget(0.2, 0.4)
        privacy_budget_split = 0.5
        max_freq = 1

        reach_points = halo.simulated_venn_diagram_reach_by_spend(
            spends, budget, privacy_budget_split, max_freq)

        expected_reach_points = []

        self.assertEqual(expected_reach_points, reach_points)
        self.assertEqual(halo.privacy_tracker.privacy_consumption.epsilon, 0)
        self.assertEqual(halo.privacy_tracker.privacy_consumption.delta, 0)
        self.assertEqual(len(halo.privacy_tracker._noising_events), 0)
Example #2
0
    def test_m3_strategy_with_ground_truth(self):
        data1 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=2)()
        publisher1 = PublisherData(FixedPriceGenerator(0.1)(data1))
        data2 = HeterogeneousImpressionGenerator(1000, gamma_shape=1.0, gamma_scale=3)()
        publisher2 = PublisherData(FixedPriceGenerator(0.05)(data2))
        dataset = DataSet([publisher1, publisher2], "dataset")

        params = SystemParameters(
            [100.0, 100.0], LiquidLegionsParameters(), np.random.default_rng(seed=1)
        )
        halo = HaloSimulator(dataset, params, PrivacyTracker())

        budget = PrivacyBudget(1.0, 1e-5)
        m3strategy = M3Strategy(
            GammaPoissonModel,
            {},
            RestrictedPairwiseUnionReachSurface,
            {},
            use_ground_truth_for_reach_curves=True,
        )
        surface = m3strategy.fit(halo, params, budget)

        expected0 = surface.by_spend([10.0, 0.0]).reach(1)
        actual0 = dataset.reach_by_spend([10.0, 0.0]).reach(1)
        self.assertAlmostEqual(expected0, actual0, delta=1)

        expected1 = surface.by_spend([0.0, 10.0]).reach(1)
        actual1 = dataset.reach_by_spend([0.0, 10.0]).reach(1)
        self.assertAlmostEqual(expected1, actual1, delta=1)

        expected2 = surface.by_spend([10.0, 10.0]).reach(1)
        actual2 = dataset.reach_by_spend([10.0, 10.0]).reach(1)
        self.assertAlmostEqual(expected2, actual2, delta=10)
Example #3
0
    def test_compute_trial_results_path(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._compute_trial_results_path()
            expected = "{}/{}/{},{},{},{}".format(
                "edir",
                "dataset",
                "strategy,single_pub_model,multi_pub_model",
                "spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0",
                "epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5",
                "test_point_strategy=tps.csv",
            )
            self.assertEqual(actual, expected)
 def test_spend_by_impressions(self):
     pdf = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04)], "test")
     self.assertEqual(pdf.spend_by_impressions(0), 0)
     self.assertEqual(pdf.spend_by_impressions(1), 0.01)
     self.assertEqual(pdf.spend_by_impressions(2), 0.02)
     self.assertEqual(pdf.spend_by_impressions(3), 0.04)
     self.assertEqual(pdf.spend_by_impressions(4), 0.04)
Example #5
0
 def setUpClass(cls):
     pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1")
     pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
     data_set = DataSet([pdf1, pdf2], "test")
     cls.data_set = data_set
     cls.curve1 = GroundTruthReachCurveModel(data_set, 0)
     cls.curve2 = GroundTruthReachCurveModel(data_set, 1)
 def test_two_publishers(self):
     pdf1 = PublisherData([(1, 3.0)], "pdf1")
     pdf2 = PublisherData([(1, 6.0)], "pdf2")
     data_set = DataSet([pdf1, pdf2], "test")
     generator = GridTestPointGenerator(data_set,
                                        np.random.default_rng(1),
                                        grid_size=2)
     values = [(int(x[0]), int(x[1])) for x in generator.test_points()]
     self.assertLen(values, 4)
     self.assertEqual(values, [(1, 2), (1, 4), (2, 2), (2, 4)])
Example #7
0
    def setUpClass(cls):
        pdf11 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                              "pdf11")
        pdf12 = PublisherData([(2, 0.03), (4, 0.06)], "pdf12")
        cls.data_set1 = DataSet([pdf11, pdf12], "ds1")

        pdf21 = PublisherData([(1, 0.01), (2, 0.02), (2, 0.04), (3, 0.05)],
                              "pdf21")
        pdf22 = PublisherData([(2, 0.03), (3, 0.06)], "pdf22")
        cls.data_set2 = DataSet([pdf21, pdf22], "ds2")
Example #8
0
    def setUpClass(cls):
        pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                             "pdf1")
        pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
        data_set = DataSet([pdf1, pdf2], "test")

        cls.params = SystemParameters([0.4, 0.5], LiquidLegionsParameters(),
                                      np.random.default_rng(1))
        cls.privacy_tracker = PrivacyTracker()
        cls.halo = HaloSimulator(data_set, cls.params, cls.privacy_tracker)
 def test_npoints_generator(self):
     pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1")
     pdf2 = PublisherData([(1, 0.02), (2, 0.04), (1, 0.08), (3, 0.10)],
                          "pdf2")
     data_set = DataSet([pdf1, pdf2], "test")
     generator = LatinHypercubeRandomTestPointGenerator(
         data_set,
         np.random.default_rng(1),
         npublishers=2,
         minimum_points_per_publisher=200,
     )
     values = [x for x in generator.test_points()]
     self.assertLen(values, 400)
    def test_read_and_write_publisher_data(self):
        pdf = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04)], "test")
        with TemporaryDirectory() as d:
            filename = join(d, "pdf_data")
            pdf_file = open(filename, "w")
            pdf.write_publisher_data(pdf_file)
            pdf_file.close()

            new_file = open(filename)
            new_pdf = PublisherData.read_publisher_data(new_file)
            self.assertEqual(new_pdf.max_impressions, 3)
            self.assertEqual(new_pdf.max_spend, 0.04)
            self.assertEqual(new_pdf.max_reach, 2)
            new_file.close()
    def _generate_data_set(self, params: DataSetParameters) -> DataSet:
        if self._verbose:
            print(params)
        publishers = []
        publisher_size = params.largest_publisher_size
        publisher_size_decay_rate = (1 if params.num_publishers == 1 else
                                     params.largest_to_smallest_publisher_ratio
                                     **(1 / float(params.num_publishers - 1)))
        for publisher in range(params.num_publishers):
            publishers.append(
                PublisherData.generate_publisher_data(
                    params.impression_generator_params.generator(
                        **{
                            "n": publisher_size,
                            "random_generator": self._random_generator,
                            **params.impression_generator_params.params,
                        }),
                    params.pricing_generator_params.generator(
                        **params.pricing_generator_params.params),
                    str(publisher + 1),
                ))
            publisher_size = math.floor(publisher_size *
                                        publisher_size_decay_rate)

        overlap_params = {**params.overlap_generator_params.params}
        if "random_generator" in overlap_params:
            overlap_params["random_generator"] = self._random_generator

        return params.overlap_generator_params.generator(publishers,
                                                         name=str(params),
                                                         **overlap_params)
    def read_data_set(
        cls, dirpath: str,
        filesystem: FsWrapperBase = FsPathlibWrapper()) -> "DataSet":
        """Reads a DataSet from disk.

        A DataSet is given by a directory containing a collection of files,
        each of which represents a PublisherDataSet.  The name associated to
        the DataSet object is the last component of the dirpath.

        Args:
          dirpath:  Directory containing the PublisherDataSets that comprise
            this DataSet.
          filesystem:  The filesystem object that manages all file operations.
        Returns:
          The DataSet object representing the contents of this directory.
        """

        pdf_list = []
        for filepath in sorted(filesystem.glob(dirpath, "*")):
            if filesystem.is_file(filepath):
                with filesystem.open(filepath) as file:
                    try:
                        pdf = PublisherData.read_publisher_data(file)
                        pdf.name = str(filepath)
                        pdf_list.append(pdf)
                    except (ValueError, RuntimeError) as e:
                        raise RuntimeError(
                            "In publisher file {}".format(filepath)) from e
        return cls(pdf_list, filesystem.name(dirpath))
Example #13
0
    def _label_ids(
        cls,
        labeled_set_ids_iter: Iterable[np.array],
        unlabeled_publisher_data_iter: Iterable[PublisherData],
    ):
        """Label the reached ids to reflect cross-pub overlap.

        Args:
          labeled_set_ids_iter:  a list or generator of per-publisher reached ids.
            These ids are labeled, i.e., meaningful of cross-pub overlap.
          unlabeled_publisher_data_iter:  a list or generator of PublisherData. The
            ids here are unlabeled, i.e., meaningless. For each PublisherData here,
            its i-th id will be labeled as the i-th id in the corresponding
            labeled_set_ids.

        Returns:
          A labeled list of PublisherData.
        """
        new_publisher_data_list = []
        for set_ids, pub_data in zip(labeled_set_ids_iter,
                                     unlabeled_publisher_data_iter):
            assert (len(set_ids) == pub_data.max_reach
                    ), "single-pub reach does not match."
            original_ids = set([oid for oid, _ in pub_data._data])
            id_map = dict(zip(original_ids, set_ids))
            new_impression_log_data = [(id_map[oid], x)
                                       for oid, x in pub_data._data]
            new_publisher_data_list.append(
                PublisherData(new_impression_log_data, pub_data.name))
        return new_publisher_data_list
 def setUpClass(cls):
     pdf = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                         "pdf1")
     cls.params = SystemParameters([1.0, 0.5, 3.0],
                                   LiquidLegionsParameters(),
                                   np.random.default_rng(1))
     cls.privacy_tracker = PrivacyTracker()
     cls.publisher = Publisher(pdf, 1, cls.params, cls.privacy_tracker)
Example #15
0
 def test_class_setup_with_campaign_spend_fractions_generator(self):
     pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1")
     pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
     data_set = DataSet([pdf1, pdf2], "test")
     params = SystemParameters(
         liquid_legions=LiquidLegionsParameters(),
         generator=np.random.default_rng(1),
         campaign_spend_fractions_generator=lambda dataset: [0.2] * dataset.
         publisher_count,
     )
     params = params.update_from_dataset(data_set)
     privacy_tracker = PrivacyTracker()
     halo = HaloSimulator(data_set, params, privacy_tracker)
     self.assertAlmostEqual(halo._campaign_spends[0], 0.01, 7)
     # using assertAlmostEqual here because of a rounding error
     self.assertAlmostEqual(halo._campaign_spends[1], 0.012, 7)
 def test_label_ids(self):
     set_ids_list = [np.array([3, 4, 5]), np.array([4, 6, 8]), np.array([6, 8, 10])]
     pdf1 = PublisherData([(2, 0.02), (1, 0.01), (1, 0.03), (3, 0.04)], "a")
     pdf2 = PublisherData([(3, 0.04), (1, 0.02), (2, 0.01)], "b")
     pdf3 = PublisherData(
         [(1, 0.01), (2, 0.02), (1, 0.04), (1, 0.01), (3, 0.05)], "c"
     )
     pdf_list = [pdf1, pdf2, pdf3]
     expected_data_list = [
         [(3, 0.01), (3, 0.03), (4, 0.02), (5, 0.04)],
         [(4, 0.02), (6, 0.01), (8, 0.04)],
         [(6, 0.01), (6, 0.01), (6, 0.04), (8, 0.02), (10, 0.05)],
     ]
     expected_name_list = ["a", "b", "c"]
     res = OverlapDataSet._label_ids(set_ids_list, pdf_list)
     self.assert_equal_pub_data_list(res, 3, expected_data_list, expected_name_list)
     res = OverlapDataSet._label_ids(iter(set_ids_list), iter(pdf_list))
     self.assert_equal_pub_data_list(res, 3, expected_data_list, expected_name_list)
 def test_one_publisher(self):
     pdf = PublisherData([(1, 100.0)], "pdf")
     data_set = DataSet([pdf], "test")
     generator = GridTestPointGenerator(data_set,
                                        np.random.default_rng(1),
                                        grid_size=4)
     values = [int(x[0]) for x in generator.test_points()]
     self.assertLen(values, 4)
     self.assertEqual(values, [20, 40, 60, 80])
Example #18
0
    def test_evaluate(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = FakeModelingStrategy
            TEST_POINT_STRATEGIES[
                "fake_tps"] = lambda ds, rng: FakeTestPointGenerator(
                ).test_points()

            msd = ModelingStrategyDescriptor("fake", {"x": 1}, "goerg", {},
                                             "pairwise_union", {})
            sparams1 = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            sparams2 = SystemParameters(
                [0.05, 0.03],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams1 = ExperimentParameters(PrivacyBudget(1.0, 0.01), 1, 5,
                                            "fake_tps")
            eparams2 = ExperimentParameters(PrivacyBudget(0.5, 0.001), 1, 5,
                                            "fake_tps")

            trial_descriptors = [
                TrialDescriptor(msd, sparams1, eparams1),
                TrialDescriptor(msd, sparams1, eparams2),
                TrialDescriptor(msd, sparams2, eparams1),
                TrialDescriptor(msd, sparams2, eparams2),
            ]

            exp = Experiment(experiment_dir, data_design, "dataset",
                             trial_descriptors)
            trials = exp.generate_trials()
            self.assertLen(trials, 4)
 def test_impressions_by_spend(self):
     pdf = PublisherData([(1, 0.01), (2, 0.02), (3, 0.02), (1, 0.04)], "test")
     self.assertEqual(pdf.impressions_by_spend(0.005), 0)
     self.assertEqual(pdf.impressions_by_spend(0.01), 1)
     self.assertEqual(pdf.impressions_by_spend(0.015), 1)
     self.assertEqual(pdf.impressions_by_spend(0.02), 3)
     self.assertEqual(pdf.impressions_by_spend(0.04), 4)
     self.assertEqual(pdf.impressions_by_spend(0.05), 4)
Example #20
0
    def test_form_venn_diagram_regions(self, num_publishers, spends, max_freq,
                                       expected):
        pdfs = [
            PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1"),
            PublisherData([(2, 0.03), (4, 0.06)], "pdf2"),
            PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"),
        ]
        data_set = DataSet(pdfs[:num_publishers], "test")
        params = SystemParameters(
            [0.4] * num_publishers,
            LiquidLegionsParameters(),
            np.random.default_rng(1),
        )
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        regions = halo._form_venn_diagram_regions(spends, max_freq)
        self.assertEqual(expected, regions)
Example #21
0
    def test_make_independent_vars_dataframe(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design = DataDesign(join(d, "data_design"))
            data_design.add(data_set)

            msd = ModelingStrategyDescriptor("strategy", {},
                                             "single_pub_model", {},
                                             "multi_pub_model", {})
            sparams = SystemParameters(
                [0.03, 0.05],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "test_point_strategy")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial("edir", data_design, "dataset",
                                      trial_descriptor)

            actual = trial._make_independent_vars_dataframe()

            expected_trial_name = "strategy,single_pub_model,multi_pub_model,spends=(0.03,0.05),decay_rate=13,sketch_size=1000000.0,epsilon=1.0,delta=0.01,replica_id=3,max_frequency=5,test_point_strategy=test_point_strategy"

            expected = pd.DataFrame({
                "dataset": ["dataset"],
                "trial": [expected_trial_name],
                "replica_id": [3],
                "single_pub_model": ["single_pub_model"],
                "multi_pub_model": ["multi_pub_model"],
                "strategy": ["strategy"],
                "liquid_legions_sketch_size": [1e6],
                "liquid_legions_decay_rate": [13],
                "maximum_reach": [4],
                "ncampaigns": [2],
                "largest_pub_reach": [3],
                "max_frequency": [5],
                "average_spend_fraction": [0.04],
            })
            pd.testing.assert_frame_equal(actual, expected)
 def test_sequentially_correlated_publisher_data_generator(self):
     pdf1 = PublisherData([(2, 0.02), (1, 0.01), (1, 0.03), (3, 0.04)], "a")
     pdf2 = PublisherData([(3, 0.04), (1, 0.02), (2, 0.01)], "b")
     pdf3 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (1, 0.01),
                           (3, 0.05)], "c")
     res = SequentiallyCorrelatedOverlapDataSet(
         unlabeled_publisher_data_list=[pdf1, pdf2, pdf3],
         order=OrderOptions.original,
         correlated_sets=CorrelatedSetsOptions.one,
         shared_prop=0.5,
         random_generator=np.random.default_rng(seed=1),
     )
     expected_data_list = [
         [(2, 0.02), (4, 0.01), (4, 0.03), (6, 0.04)],
         [(0, 0.01), (6, 0.02), (5, 0.04)],
         [(5, 0.01), (1, 0.02), (3, 0.05), (5, 0.04)],
     ]
     expected_name_list = ["a", "b", "c"]
     self.assert_equal_pub_data_list(res._data, 3, expected_data_list,
                                     expected_name_list)
 def test_independent_overlap_data_set(self):
     pdf1 = PublisherData([(2, 0.02), (1, 0.01), (1, 0.03), (3, 0.04)], "a")
     pdf2 = PublisherData([(3, 0.04), (1, 0.02), (2, 0.01)], "b")
     pdf3 = PublisherData(
         [(1, 0.01), (2, 0.02), (1, 0.04), (1, 0.01), (3, 0.05)], "c"
     )
     res = IndependentOverlapDataSet(
         unlabeled_publisher_data_list=[pdf1, pdf2, pdf3],
         universe_size=5,
         random_generator=np.random.default_rng(1),
     )
     expected_data_list = [
         [(0, 0.01), (0, 0.03), (2, 0.02), (3, 0.04)],
         [(0, 0.02), (1, 0.01), (2, 0.04)],
         [(2, 0.02), (1, 0.04), (3, 0.05), (1, 0.01)],
     ]
     expected_name_list = ["a", "b", "c"]
     self.assert_equal_pub_data_list(
         res._data, 3, expected_data_list, expected_name_list
     )
 def test_fifteen_publishers(self):
     pdf_list = []
     for i in range(15):
         pdf = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                             "pdf{}".format(i))
         pdf_list.append(pdf)
     data_set = DataSet(pdf_list, "test")
     generator = LatinHypercubeRandomTestPointGenerator(
         data_set, np.random.default_rng(1), npoints=225)
     values = [x for x in generator.test_points()]
     self.assertLen(values, 225)
 def test_two_publishers(self):
     pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1")
     pdf2 = PublisherData([(1, 0.02), (2, 0.04), (1, 0.08), (3, 0.10)],
                          "pdf2")
     data_set = DataSet([pdf1, pdf2], "test")
     generator = LatinHypercubeRandomTestPointGenerator(
         data_set, np.random.default_rng(1), npoints=100)
     values = [x for x in generator.test_points()]
     self.assertLen(values, 100)
     for i, v in enumerate(values):
         self.assertLen(v, 2)
         self.assertTrue(v[0] >= 0.0,
                         "Item {} is negative: {}".format(i, v))
         self.assertTrue(v[0] < 0.05,
                         "Item {} is too large: {}".format(i, v))
         self.assertTrue(v[1] >= 0.0,
                         "Item {} is negative: {}".format(i, v))
         self.assertTrue(v[1] < 0.10,
                         "Item {} is too large: {}".format(i, v))
Example #26
0
    def test_generate_reach_points_from_venn_diagram(self, num_publishers,
                                                     spends, regions,
                                                     expected):
        pdfs = [
            PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                          "pdf1"),
            PublisherData([(2, 0.03), (4, 0.06)], "pdf2"),
            PublisherData([(2, 0.01), (3, 0.03), (4, 0.05)], "pdf3"),
        ]
        data_set = DataSet(pdfs[:num_publishers], "test")
        params = SystemParameters(
            [0.4] * num_publishers,
            LiquidLegionsParameters(),
            np.random.default_rng(1),
        )
        privacy_tracker = PrivacyTracker()
        halo = HaloSimulator(data_set, params, privacy_tracker)

        # Note that the reach points generated from the Venn diagram only
        # contain 1+ reaches.
        reach_points = halo._generate_reach_points_from_venn_diagram(
            spends, regions)

        self.assertEqual(len(reach_points), len(expected))

        for i, (r_pt, expected_r_pt) in enumerate(zip(reach_points, expected)):
            self.assertEqual(
                r_pt.impressions,
                expected_r_pt.impressions,
                msg=f"The impressions of No.{i + 1} reach point is not correct",
            )
            self.assertEqual(
                r_pt.reach(1),
                expected_r_pt.reach(1),
                msg=f"The reach of No.{i + 1} reach point is not correct",
            )
            self.assertEqual(
                r_pt.spends,
                expected_r_pt.spends,
                msg=f"The spends of No.{i + 1} reach point is not correct",
            )
 def test_disjoint_overlap_data_set(self):
     pdf1 = PublisherData([(2, 0.02), (1, 0.01), (1, 0.03), (3, 0.04)], "a")
     pdf2 = PublisherData([(3, 0.04), (1, 0.02), (2, 0.01)], "b")
     pdf3 = PublisherData(
         [(1, 0.01), (2, 0.02), (1, 0.04), (1, 0.01), (3, 0.05)], "c"
     )
     res = OverlapDataSet(
         unlabeled_publisher_data_list=[pdf1, pdf2, pdf3],
         overlap_generator=DisjointSetGenerator,
         name="disjoint",
     )
     self.assertEqual(res.name, "disjoint")
     expected_data_list = [
         [(0, 0.01), (0, 0.03), (1, 0.02), (2, 0.04)],
         [(3, 0.02), (4, 0.01), (5, 0.04)],
         [(6, 0.01), (6, 0.01), (6, 0.04), (7, 0.02), (8, 0.05)],
     ]
     expected_name_list = ["a", "b", "c"]
     self.assert_equal_pub_data_list(
         res._data, 3, expected_data_list, expected_name_list
     )
 def test_user_counts_by_spend(self):
     pdf = PublisherData([(1, 0.01), (1, 0.04), (2, 0.02)])
     self.assertEqual(pdf.user_counts_by_spend(0), {})
     self.assertEqual(pdf.user_counts_by_spend(0.01), {1: 1})
     self.assertEqual(pdf.user_counts_by_spend(0.015), {1: 1})
     self.assertEqual(pdf.user_counts_by_spend(0.03), {1: 1, 2: 1})
     self.assertEqual(pdf.user_counts_by_spend(0.07), {1: 2, 2: 1})
    def test_latin_hypercube_definition(self):
        """Check if the points satisifies the definiton of Latin Hypercube.

        Test if the generated test points are indeed projected into equally space
        cells along each dimension.
        """
        pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                             "pdf1")
        pdf2 = PublisherData([(1, 0.02), (2, 0.04), (1, 0.08), (3, 0.10)],
                             "pdf2")
        pdf3 = PublisherData([(1, 0.02), (2, 0.04), (1, 0.01), (3, 0.06)],
                             "pdf3")
        data_set = DataSet([pdf1, pdf2, pdf3], "test")
        generator = LatinHypercubeRandomTestPointGenerator(
            data_set, np.random.default_rng(1), npoints=100)
        design = np.stack([x for x in generator.test_points()])
        equally_spaced = set(range(100))
        self.assertEqual(set((design[:, 0] / 0.05 * 100).astype("int32")),
                         equally_spaced)
        self.assertEqual(set((design[:, 1] / 0.10 * 100).astype("int32")),
                         equally_spaced)
        self.assertEqual(set((design[:, 2] / 0.06 * 100).astype("int32")),
                         equally_spaced)
Example #30
0
    def test_evaluate(self):
        with TemporaryDirectory() as d:
            pdf1 = PublisherData([(1, 0.01), (2, 0.02), (1, 0.04), (3, 0.05)],
                                 "pdf1")
            pdf2 = PublisherData([(2, 0.02), (2, 0.03), (4, 0.06)], "pdf2")
            data_set = DataSet([pdf1, pdf2], "dataset")
            data_design_dir = join(d, "data_design")
            experiment_dir = join(d, "experiments")
            data_design = DataDesign(data_design_dir)
            data_design.add(data_set)

            MODELING_STRATEGIES["fake"] = FakeModelingStrategy
            TEST_POINT_STRATEGIES["fake_tps"] = FakeTestPointGenerator

            msd = ModelingStrategyDescriptor("fake", {"x": 1}, "goerg", {},
                                             "pairwise_union", {})
            sparams = SystemParameters(
                [0.9, 0.9],
                LiquidLegionsParameters(13, 1e6, 1),
                np.random.default_rng(),
            )
            eparams = ExperimentParameters(PrivacyBudget(1.0, 0.01), 3, 5,
                                           "fake_tps")
            trial_descriptor = TrialDescriptor(msd, sparams, eparams)
            trial = ExperimentalTrial(experiment_dir, data_design, "dataset",
                                      trial_descriptor)
            result = trial.evaluate(seed=1)
            # We don't check each column in the resulting dataframe, because these have
            # been checked by the preceding unit tests.  However, we make a few strategic
            # probes.
            self.assertEqual(result.shape[0], 1)
            self.assertEqual(result["dataset"][0], "dataset")
            self.assertEqual(result["replica_id"][0], 3)
            self.assertEqual(result["privacy_budget_epsilon"][0], 1.0)
            self.assertEqual(result["npoints"][0], 1)
            self.assertEqual(result["model_succeeded"][0], 1)
            self.assertEqual(result["model_exception"][0], "")