def _get_data_bounds(self, records, epsilon): """ Generate the output bounds for the given data set for a pre defined computation """ compute_driver = self.compute_driver_class() min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds sensitive = self.data_driver.sensitiveness # Find the first and third quartile of the distribution in a # differentially private manner records_transpose = zip(*records) hist = dpalgos.histogram(records_transpose, sensitive, epsilon) logger.debug("Ask compute driver what percentile to calculate") percentile_values = compute_driver.get_percentiles(hist) logger.debug("Estimating percentiles") lower_percentiles = [] higher_percentiles = [] for index in range(len(records_transpose)): if not sensitive[index]: lower_percentiles.append(0) higher_percentiles.append(0) else: lp = dpalgos.estimate_percentile(percentile_values[index][0], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) hp = dpalgos.estimate_percentile(percentile_values[index][1], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) lower_percentiles.append(lp) higher_percentiles.append(hp) logger.debug("Finished percentile estimation") logger.debug("Output bound estimation in progress") # Use the ComputeDriver's bound generator to generate the # output bounds return compute_driver.get_output_bounds(lower_percentiles, higher_percentiles)
def _get_data_bounds(self, records, epsilon): """ Generate the output bounds for the given data set for a pre defined computation """ compute_driver = self.compute_driver_class() min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds sensitive = self.data_driver.sensitiveness # Find the first and third quartile of the distribution in a # differentially private manner records_transpose = zip(*records) hist = dpalgos.histogram(records_transpose, sensitive, epsilon) logger.debug("Ask compute driver what percentile to calculate") percentile_values = compute_driver.get_percentiles(hist) logger.debug("Estimating percentiles") lower_percentiles = [] higher_percentiles = [] for index in range(len(records_transpose)): if not sensitive[index]: lower_percentiles.append(0) higher_percentiles.append(0) else: lp = dpalgos.estimate_percentile( percentile_values[index][0], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) hp = dpalgos.estimate_percentile( percentile_values[index][1], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index]) lower_percentiles.append(lp) higher_percentiles.append(hp) logger.debug("Finished percentile estimation") logger.debug("Output bound estimation in progress") # Use the ComputeDriver's bound generator to generate the # output bounds return compute_driver.get_output_bounds(lower_percentiles, higher_percentiles)
def _get_data_bounds_parallel(self, records, epsilon): """ Generate the output bounds for the given data set for a pre defined computation """ compute_driver = self.compute_driver_class() min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds sensitive = self.data_driver.sensitiveness # Find the first and third quartile of the distribution in a # differentially private manner records_transpose = zip(*records) hist = dpalgos.histogram(records_transpose, sensitive, epsilon) logger.debug("Ask compute driver what percentile to calculate") percentile_values = compute_driver.get_percentiles(hist) logger.debug("Estimating percentiles in parallel") lower_percentiles = [0] * len(records_transpose) higher_percentiles = [0] * len(records_transpose) pipes = [] procs = [] for index in range(len(records_transpose)): if sensitive[index]: p, c = Pipe() proc = Process(target=spawn(dpalgos.estimate_percentile), args=(c, percentile_values[index][0], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index])) pipes.append((p, c,)) procs.append(proc) proc.start() p, c = Pipe() proc = Process(target=spawn(dpalgos.estimate_percentile), args=(c, percentile_values[index][1], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index])) pipes.append((p, c,)) procs.append(proc) proc.start() else: procs.append(None) procs.append(None) pipes.append(None) pipes.append(None) for index in range(len(records_transpose)): if sensitive[index]: procs[2 * index].join() lower_percentiles[index] = pipes[2 * index][0].recv() procs[2 * index + 1].join() higher_percentiles[index] = pipes[2 * index + 1][0].recv() logger.debug("Finished parallel percentile estimation") logger.debug("Output bound estimation in progress") # Use the ComputeDriver's bound generator to generate the # output bounds return compute_driver.get_output_bounds(lower_percentiles, higher_percentiles)
def _get_data_bounds_parallel(self, records, epsilon): """ Generate the output bounds for the given data set for a pre defined computation """ compute_driver = self.compute_driver_class() min_vals, max_vals = self.data_driver.min_bounds, self.data_driver.max_bounds sensitive = self.data_driver.sensitiveness # Find the first and third quartile of the distribution in a # differentially private manner records_transpose = zip(*records) hist = dpalgos.histogram(records_transpose, sensitive, epsilon) logger.debug("Ask compute driver what percentile to calculate") percentile_values = compute_driver.get_percentiles(hist) logger.debug("Estimating percentiles in parallel") lower_percentiles = [0] * len(records_transpose) higher_percentiles = [0] * len(records_transpose) pipes = [] procs = [] for index in range(len(records_transpose)): if sensitive[index]: p, c = Pipe() proc = Process(target=spawn(dpalgos.estimate_percentile), args=(c, percentile_values[index][0], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index])) pipes.append(( p, c, )) procs.append(proc) proc.start() p, c = Pipe() proc = Process(target=spawn(dpalgos.estimate_percentile), args=(c, percentile_values[index][1], records_transpose[index], epsilon / (3 * len(records_transpose)), min_vals[index], max_vals[index])) pipes.append(( p, c, )) procs.append(proc) proc.start() else: procs.append(None) procs.append(None) pipes.append(None) pipes.append(None) for index in range(len(records_transpose)): if sensitive[index]: procs[2 * index].join() lower_percentiles[index] = pipes[2 * index][0].recv() procs[2 * index + 1].join() higher_percentiles[index] = pipes[2 * index + 1][0].recv() logger.debug("Finished parallel percentile estimation") logger.debug("Output bound estimation in progress") # Use the ComputeDriver's bound generator to generate the # output bounds return compute_driver.get_output_bounds(lower_percentiles, higher_percentiles)