Example #1
0
 def _compute_reward(self, query):
     ast_transform = self.observe(query)
     d1_query = query
     d2_query = query.replace("d1.d1", "d2.d2")
     d1_dataset, d2_dataset, d1_metadata, d2_metadata = generate_neighbors(
         self.df, self.metadata)
     d1 = PandasReader(d1_metadata, d1_dataset)
     d2 = PandasReader(d2_metadata, d2_dataset)
     eval = DPEvaluator()
     pa = DPSingletonQuery()
     key_metrics = eval.evaluate([d1_metadata, d1], [d2_metadata, d2], pa,
                                 query, self.pp, self.ev)
     message = None
     if key_metrics["__key__"].dp_res is None:
         dpresult = "DP_BUG"
         self.reward = 1
         message = key_metrics["__key__"].error
     elif key_metrics["__key__"].dp_res == False:
         self._game_ended = True
         dpresult = "DP_FAIL"
         self.reward = 20
         message = "dp_res_False"
     elif (key_metrics["__key__"].dp_res == True and
           key_metrics["__key__"].jensen_shannon_divergence == math.inf):
         self._game_ended = True
         dpresult = "DP_BUG"
         self.reward = 20
         message = "jsdistance_is_inf"
     else:
         res_list = []
         for key, metrics in key_metrics.items():
             dp_res = metrics.dp_res
             js_res = metrics.jensen_shannon_divergence
             # ws_res = metrics.wasserstein_distance
             res_list.append([dp_res, js_res])
         dp_res = np.all(np.array([res[0] for res in res_list]))
         js_res = (np.array([res[1] for res in res_list])).max()
         # ws_res = (np.array([res[2] for res in res_list])).max()
         if dp_res == True:
             dpresult = "DP_PASS"
             self.reward = js_res
     return dpresult, self.reward, message, d1, d2
Example #2
0
 def learn(self, querypool, export_as_csv=False):
     output = []
     for i in range(len(querypool)):
         df, metadata = create_simulated_dataset(self.dd.dataset_size,
                                                 "dataset")
         d1_dataset, d2_dataset, d1_metadata, d2_metadata = generate_neighbors(
             df, metadata)
         d1 = PandasReader(d1_metadata, d1_dataset)
         d2 = PandasReader(d2_metadata, d2_dataset)
         eval = DPEvaluator()
         pa = DPSingletonQuery()
         key_metrics = eval.evaluate([d1_metadata, d1], [d2_metadata, d2],
                                     pa, querypool[i], self.pp, self.ev)
         if key_metrics["__key__"].dp_res is None:
             dp_res = key_metrics["__key__"].dp_res
             error = key_metrics["__key__"].error
             output.append({
                 "query": querypool[i],
                 "dpresult": dp_res,
                 "jensen_shannon_divergence": None,
                 "error": error,
             })
         else:
             res_list = []
             for key, metrics in key_metrics.items():
                 dp_res = metrics.dp_res
                 js_res = metrics.jensen_shannon_divergence
                 res_list.append([dp_res, js_res])
             dp_res = np.all(np.array([res[0] for res in res_list]))
             js_res = (np.array([res[1] for res in res_list])).max()
             output.append({
                 "query": querypool[i],
                 "dpresult": dp_res,
                 "jensen_shannon_divergence": js_res,
                 "error": None,
             })
     if export_as_csv:
         write_to_csv("Bandit.csv", output, flag="bandit")
     else:
         return output
Example #3
0
    def test_interface_count(self):
        logging.getLogger().setLevel(logging.DEBUG)
        # Initialize params and algorithm to benchmark
        pa = DPSingletonQuery()
        pp = PrivacyParams(epsilon=1.0)
        ev = EvaluatorParams(repeat_count=100)
        dd = DatasetParams(dataset_size=500)
        query = "SELECT COUNT(UserId) AS UserCount FROM dataset.dataset"

        # Preparing neighboring datasets
        df, metadata = self.create_simulated_dataset(dd.dataset_size,
                                                     "dataset")
        d1_dataset, d2_dataset, d1_metadata, d2_metadata = self.generate_neighbors(
            df, metadata)
        d1 = PandasReader(d1_dataset, d1_metadata)
        d2 = PandasReader(d2_dataset, d2_metadata)

        # Call evaluate
        eval = DPEvaluator()
        key_metrics = eval.evaluate([d1_metadata, d1], [d2_metadata, d2], pa,
                                    query, pp, ev)
        # After evaluation, it should return True and distance metrics should be non-zero
        for key, metrics in key_metrics.items():
            assert (metrics.dp_res == True)
            test_logger.debug("Wasserstein Distance:" +
                              str(metrics.wasserstein_distance))
            test_logger.debug("Jensen Shannon Divergence:" +
                              str(metrics.jensen_shannon_divergence))
            test_logger.debug("KL Divergence:" + str(metrics.kl_divergence))
            test_logger.debug("MSE:" + str(metrics.mse))
            test_logger.debug("Standard Deviation:" + str(metrics.std))
            test_logger.debug("Mean Signed Deviation:" + str(metrics.msd))
            assert (metrics.wasserstein_distance > 0.0)
            assert (metrics.jensen_shannon_divergence > 0.0)
            assert (metrics.kl_divergence != 0.0)
            assert (metrics.mse > 0.0)
            assert (metrics.std != 0.0)
            assert (metrics.msd != 0.0)
Example #4
0
 def __init__(self):
     self.pp = PrivacyParams(epsilon=1.0)
     self.ev = EvaluatorParams(repeat_count=100)
     self.dd = DatasetParams(dataset_size=500)
     self.pa = DPSingletonQuery()