예제 #1
0
파일: gupt.py 프로젝트: appcoreopc/GUPT
    def _get_data_bounds(self, records, epsilon):
        """
        Generate the output bounds for the given data set for a pre
        defined computation
        """
        compute_driver = self.compute_driver_class()
        min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds
        sensitive = self.data_driver.sensitiveness

        # Find the first and third quartile of the distribution in a
        # differentially private manner
        records_transpose = zip(*records)

        hist = dpalgos.histogram(records_transpose, sensitive, epsilon)
        logger.debug("Ask compute driver what percentile to calculate")
        percentile_values = compute_driver.get_percentiles(hist)
        
        logger.debug("Estimating percentiles")
        lower_percentiles = []
        higher_percentiles = []
        for index in range(len(records_transpose)):
            if not sensitive[index]:
                lower_percentiles.append(0)
                higher_percentiles.append(0)
            else:
                lp = dpalgos.estimate_percentile(percentile_values[index][0],
                                                 records_transpose[index],
                                                 epsilon / (3 * len(records_transpose)),
                                                 min_vals[index],
                                                 max_vals[index])
                hp = dpalgos.estimate_percentile(percentile_values[index][1],
                                                 records_transpose[index],
                                                 epsilon / (3 * len(records_transpose)),
                                                 min_vals[index],
                                                 max_vals[index])
                lower_percentiles.append(lp)
                higher_percentiles.append(hp)

        logger.debug("Finished percentile estimation")
        logger.debug("Output bound estimation in progress")
        # Use the ComputeDriver's bound generator to generate the
        # output bounds
        return compute_driver.get_output_bounds(lower_percentiles,
                                                higher_percentiles)
예제 #2
0
파일: gupt.py 프로젝트: tashfernandes/gupt
    def _get_data_bounds(self, records, epsilon):
        """
        Generate the output bounds for the given data set for a pre
        defined computation
        """
        compute_driver = self.compute_driver_class()
        min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds
        sensitive = self.data_driver.sensitiveness

        # Find the first and third quartile of the distribution in a
        # differentially private manner
        records_transpose = zip(*records)

        hist = dpalgos.histogram(records_transpose, sensitive, epsilon)
        logger.debug("Ask compute driver what percentile to calculate")
        percentile_values = compute_driver.get_percentiles(hist)

        logger.debug("Estimating percentiles")
        lower_percentiles = []
        higher_percentiles = []
        for index in range(len(records_transpose)):
            if not sensitive[index]:
                lower_percentiles.append(0)
                higher_percentiles.append(0)
            else:
                lp = dpalgos.estimate_percentile(
                    percentile_values[index][0], records_transpose[index],
                    epsilon / (3 * len(records_transpose)), min_vals[index],
                    max_vals[index])
                hp = dpalgos.estimate_percentile(
                    percentile_values[index][1], records_transpose[index],
                    epsilon / (3 * len(records_transpose)), min_vals[index],
                    max_vals[index])
                lower_percentiles.append(lp)
                higher_percentiles.append(hp)

        logger.debug("Finished percentile estimation")
        logger.debug("Output bound estimation in progress")
        # Use the ComputeDriver's bound generator to generate the
        # output bounds
        return compute_driver.get_output_bounds(lower_percentiles,
                                                higher_percentiles)
예제 #3
0
파일: gupt.py 프로젝트: appcoreopc/GUPT
    def _get_data_bounds_parallel(self, records, epsilon):
        """
        Generate the output bounds for the given data set for a pre
        defined computation
        """
        compute_driver = self.compute_driver_class()
        min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds
        sensitive = self.data_driver.sensitiveness

        # Find the first and third quartile of the distribution in a
        # differentially private manner
        records_transpose = zip(*records)
        hist = dpalgos.histogram(records_transpose, sensitive, epsilon)
        logger.debug("Ask compute driver what percentile to calculate")
        percentile_values = compute_driver.get_percentiles(hist)

        logger.debug("Estimating percentiles in parallel")
        lower_percentiles = [0] * len(records_transpose)
        higher_percentiles = [0] * len(records_transpose)

        pipes = []
        procs = []
        for index in range(len(records_transpose)):
            if sensitive[index]:
                p, c = Pipe()
                proc = Process(target=spawn(dpalgos.estimate_percentile),
                               args=(c, percentile_values[index][0],
                                     records_transpose[index],
                                     epsilon / (3 * len(records_transpose)),
                                     min_vals[index],
                                     max_vals[index]))
                pipes.append((p, c,))
                procs.append(proc)
                proc.start()

                p, c = Pipe()
                proc = Process(target=spawn(dpalgos.estimate_percentile),
                               args=(c, percentile_values[index][1],
                                     records_transpose[index],
                                     epsilon / (3 * len(records_transpose)),
                                     min_vals[index],
                                     max_vals[index]))

                pipes.append((p, c,))
                procs.append(proc)
                proc.start()
            else:
                procs.append(None)
                procs.append(None)
                pipes.append(None)
                pipes.append(None)
                

        for index in range(len(records_transpose)):
            if sensitive[index]:
                procs[2 * index].join()
                lower_percentiles[index] = pipes[2 * index][0].recv()

                procs[2 * index + 1].join()
                higher_percentiles[index] = pipes[2 * index + 1][0].recv()

        logger.debug("Finished parallel percentile estimation")
        logger.debug("Output bound estimation in progress")
        # Use the ComputeDriver's bound generator to generate the
        # output bounds
        return compute_driver.get_output_bounds(lower_percentiles,
                                                higher_percentiles)
예제 #4
0
파일: gupt.py 프로젝트: tashfernandes/gupt
    def _get_data_bounds_parallel(self, records, epsilon):
        """
        Generate the output bounds for the given data set for a pre
        defined computation
        """
        compute_driver = self.compute_driver_class()
        min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds
        sensitive = self.data_driver.sensitiveness

        # Find the first and third quartile of the distribution in a
        # differentially private manner
        records_transpose = zip(*records)
        hist = dpalgos.histogram(records_transpose, sensitive, epsilon)
        logger.debug("Ask compute driver what percentile to calculate")
        percentile_values = compute_driver.get_percentiles(hist)

        logger.debug("Estimating percentiles in parallel")
        lower_percentiles = [0] * len(records_transpose)
        higher_percentiles = [0] * len(records_transpose)

        pipes = []
        procs = []
        for index in range(len(records_transpose)):
            if sensitive[index]:
                p, c = Pipe()
                proc = Process(target=spawn(dpalgos.estimate_percentile),
                               args=(c, percentile_values[index][0],
                                     records_transpose[index],
                                     epsilon / (3 * len(records_transpose)),
                                     min_vals[index], max_vals[index]))
                pipes.append((
                    p,
                    c,
                ))
                procs.append(proc)
                proc.start()

                p, c = Pipe()
                proc = Process(target=spawn(dpalgos.estimate_percentile),
                               args=(c, percentile_values[index][1],
                                     records_transpose[index],
                                     epsilon / (3 * len(records_transpose)),
                                     min_vals[index], max_vals[index]))

                pipes.append((
                    p,
                    c,
                ))
                procs.append(proc)
                proc.start()
            else:
                procs.append(None)
                procs.append(None)
                pipes.append(None)
                pipes.append(None)

        for index in range(len(records_transpose)):
            if sensitive[index]:
                procs[2 * index].join()
                lower_percentiles[index] = pipes[2 * index][0].recv()

                procs[2 * index + 1].join()
                higher_percentiles[index] = pipes[2 * index + 1][0].recv()

        logger.debug("Finished parallel percentile estimation")
        logger.debug("Output bound estimation in progress")
        # Use the ComputeDriver's bound generator to generate the
        # output bounds
        return compute_driver.get_output_bounds(lower_percentiles,
                                                higher_percentiles)