Esempio n. 1
0
    def _windsorized(self, epsilon, lower_bounds, higher_bounds, output):
        """
        Privatize each dimension of the output in a winsorized manner
        """
        if isiterable(output[0]):
            noise = []
            estimate = []
            for index in range(len(output)):
                e, n = self._windsorized(epsilon / len(output), lower_bounds[index], higher_bounds[index], output[index])
                estimate.append(e)
                noise.append(n)
            return estimate, noise

        dimension = list(output)
        rad = len(output) ** (1.0 / 3 + 0.1)
        
        lps = dpalgos.estimate_percentile(0.25, dimension,
                                          epsilon / 4,
                                          lower_bounds,
                                          higher_bounds)
        hps = dpalgos.estimate_percentile(0.75, dimension,
                                          epsilon / 4,
                                          lower_bounds,
                                          higher_bounds)
        crude_mu = float(lps + hps) / 2
        crude_iqr = abs(hps - lps)
        u = crude_mu + 4 * rad * crude_iqr
        l = crude_mu - 4 * rad * crude_iqr
        # Compute windsorized mean for range
        self._sanitize_multidim(dimension, [l] * len(dimension), [u] * len(dimension))
                
        mean_estimate = float(sum(dimension)) / len(dimension)
        noise = dpalgos.gen_noise(self.sensitivity_factor * float(abs(u - l)) / (2 * epsilon * len(dimension)))
        return mean_estimate, noise
Esempio n. 2
0
    def _windsorized(self, epsilon, lower_bounds, higher_bounds, output):
        """
        Privatize each dimension of the output in a winsorized manner
        """
        if isiterable(output[0]):
            noise = []
            estimate = []
            for index in range(len(output)):
                e, n = self._windsorized(epsilon / len(output),
                                         lower_bounds[index],
                                         higher_bounds[index], output[index])
                estimate.append(e)
                noise.append(n)
            return estimate, noise

        dimension = list(output)
        rad = len(output)**(1.0 / 3 + 0.1)

        lps = dpalgos.estimate_percentile(0.25, dimension, epsilon / 4,
                                          lower_bounds, higher_bounds)
        hps = dpalgos.estimate_percentile(0.75, dimension, epsilon / 4,
                                          lower_bounds, higher_bounds)
        crude_mu = float(lps + hps) / 2
        crude_iqr = abs(hps - lps)
        u = crude_mu + 4 * rad * crude_iqr
        l = crude_mu - 4 * rad * crude_iqr
        # Compute windsorized mean for range
        self._sanitize_multidim(dimension, [l] * len(dimension),
                                [u] * len(dimension))

        mean_estimate = float(sum(dimension)) / len(dimension)
        noise = dpalgos.gen_noise(self.sensitivity_factor * float(abs(u - l)) /
                                  (2 * epsilon * len(dimension)))
        return mean_estimate, noise
Esempio n. 3
0
    def _get_data_bounds(self, records, epsilon):
        """
        Generate the output bounds for the given data set for a pre
        defined computation
        """
        compute_driver = self.compute_driver_class()
        min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds
        sensitive = self.data_driver.sensitiveness

        # Find the first and third quartile of the distribution in a
        # differentially private manner
        records_transpose = zip(*records)

        hist = dpalgos.histogram(records_transpose, sensitive, epsilon)
        logger.debug("Ask compute driver what percentile to calculate")
        percentile_values = compute_driver.get_percentiles(hist)
        
        logger.debug("Estimating percentiles")
        lower_percentiles = []
        higher_percentiles = []
        for index in range(len(records_transpose)):
            if not sensitive[index]:
                lower_percentiles.append(0)
                higher_percentiles.append(0)
            else:
                lp = dpalgos.estimate_percentile(percentile_values[index][0],
                                                 records_transpose[index],
                                                 epsilon / (3 * len(records_transpose)),
                                                 min_vals[index],
                                                 max_vals[index])
                hp = dpalgos.estimate_percentile(percentile_values[index][1],
                                                 records_transpose[index],
                                                 epsilon / (3 * len(records_transpose)),
                                                 min_vals[index],
                                                 max_vals[index])
                lower_percentiles.append(lp)
                higher_percentiles.append(hp)

        logger.debug("Finished percentile estimation")
        logger.debug("Output bound estimation in progress")
        # Use the ComputeDriver's bound generator to generate the
        # output bounds
        return compute_driver.get_output_bounds(lower_percentiles,
                                                higher_percentiles)
Esempio n. 4
0
    def _get_data_bounds(self, records, epsilon):
        """
        Generate the output bounds for the given data set for a pre
        defined computation
        """
        compute_driver = self.compute_driver_class()
        min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds
        sensitive = self.data_driver.sensitiveness

        # Find the first and third quartile of the distribution in a
        # differentially private manner
        records_transpose = zip(*records)

        hist = dpalgos.histogram(records_transpose, sensitive, epsilon)
        logger.debug("Ask compute driver what percentile to calculate")
        percentile_values = compute_driver.get_percentiles(hist)

        logger.debug("Estimating percentiles")
        lower_percentiles = []
        higher_percentiles = []
        for index in range(len(records_transpose)):
            if not sensitive[index]:
                lower_percentiles.append(0)
                higher_percentiles.append(0)
            else:
                lp = dpalgos.estimate_percentile(
                    percentile_values[index][0], records_transpose[index],
                    epsilon / (3 * len(records_transpose)), min_vals[index],
                    max_vals[index])
                hp = dpalgos.estimate_percentile(
                    percentile_values[index][1], records_transpose[index],
                    epsilon / (3 * len(records_transpose)), min_vals[index],
                    max_vals[index])
                lower_percentiles.append(lp)
                higher_percentiles.append(hp)

        logger.debug("Finished percentile estimation")
        logger.debug("Output bound estimation in progress")
        # Use the ComputeDriver's bound generator to generate the
        # output bounds
        return compute_driver.get_output_bounds(lower_percentiles,
                                                higher_percentiles)