def detect_counterexample(algorithm, test_epsilon, default_kwargs, event_search_space=None, databases=None, event_iterations=100000, detect_iterations=500000, cores=0, loglevel=logging.INFO): """ :param algorithm: The algorithm to test for. :param test_epsilon: The privacy budget to test for, can either be a number or a tuple/list. :param default_kwargs: The default arguments the algorithm needs except the first Queries argument, 'epsilon' must be provided. :param event_search_space: The search space for event selector to reduce search time, optional. :param databases: The databases to run for detection, optional. :param event_iterations: The iterations for event selector to run, default is 100000. :param detect_iterations: The iterations for detector to run, default is 500000. :param cores: The cores to utilize, 0 means auto-detection. :param loglevel: The loglevel for logging package. :return: [(epsilon, p, d1, d2, kwargs, event)] The epsilon-p pairs along with databases/arguments/selected event. """ logging.basicConfig(level=loglevel) logger.info('Starting to find counter example on algorithm {} with test epsilon {}\n' .format(algorithm.__name__, test_epsilon)) logger.info('\nExtra arguments:\n' 'default_kwargs: {}\n' 'event_search_space: {}\n' 'databases: {}\n' 'cores:{}\n'.format(default_kwargs, event_search_space, databases, cores)) if databases is not None: d1, d2 = databases kwargs = generate_arguments(algorithm, d1, d2, default_kwargs=default_kwargs) input_list = ((d1, d2, kwargs),) else: input_list = generate_databases(algorithm, 5, default_kwargs=default_kwargs) result = [] test_epsilon = (test_epsilon, ) if isinstance(test_epsilon, (int, float)) else test_epsilon pool = None if cores == 0: pool = mp.Pool(mp.cpu_count()) elif cores != 1: pool = mp.Pool(cores) try: for i, epsilon in enumerate(test_epsilon): d1, d2, kwargs, event = select_event(algorithm, input_list, epsilon, event_iterations, search_space=event_search_space, process_pool=pool) # fix the database and arguments if selected for performance input_list = ((d1, d2, kwargs),) if len(input_list) > 1 else input_list p1, _ = hypothesis_test(algorithm, d1, d2, kwargs, event, epsilon, detect_iterations, process_pool=pool) result.append((epsilon, p1, d1, d2, kwargs, event)) print('Epsilon: {} | p-value: {:5.3f} | Event: {} | {:5.1f}%' .format(epsilon, p1, event, float(i + 1) / len(test_epsilon) * 100)) logger.debug('D1: {} | D2: {} | kwargs: {}'.format(d1, d2, kwargs)) finally: if pool is not None: pool.close() else: pass return result
def detect_counterexample(algorithm, test_epsilon, default_kwargs=None, databases=None, num_input=(5, 10), event_iterations=100000, detect_iterations=500000, cores=0, sensitivity=ALL_DIFFER, quiet=False, loglevel=logging.INFO): """ :param algorithm: The algorithm to test for. :param test_epsilon: The privacy budget to test for, can either be a number or a tuple/list. :param default_kwargs: The default arguments the algorithm needs except the first Queries argument. :param databases: The databases to run for detection, optional. :param num_input: The length of input to generate, not used if database param is specified. :param event_iterations: The iterations for event selector to run, default is 100000. :param detect_iterations: The iterations for detector to run, default is 500000. :param cores: The cores to utilize, 0 means auto-detection. :param sensitivity: The sensitivity setting, all queries can differ by one or just one query can differ by one. :param quiet: Do not print progress bar or messages, logs are not affected, default is False. :param loglevel: The loglevel for logging package. :return: [(epsilon, p, d1, d2, kwargs, event)] The epsilon-p pairs along with databases/arguments/selected event. """ # initialize an empty default kwargs if None is given default_kwargs = default_kwargs if default_kwargs else {} logging.basicConfig(level=loglevel) logger.info( 'Start detection for counterexample on algorithm {} with test epsilon {}' .format(algorithm.__name__, test_epsilon)) logger.info( 'Options -> default_kwargs: {} | databases: {} | cores:{}'.format( default_kwargs, databases, cores)) # log warnings about gsl installation if use_gsl: logger.info( 'Found GSL installation, using GSL implementation of hypergeom.cdf for better performance.' ) else: logger.warning( 'Did not find Gnu Scientific Library (GSL) installation, falling back to scipy implementation of ' 'hypergeom.cdf. Note that GSL provides much faster implementation than scipy which can ' 'significantly increase detection performance.') input_list = [] if databases is not None: d1, d2 = databases kwargs = generate_arguments(algorithm, d1, d2, default_kwargs=default_kwargs) input_list = ((d1, d2, kwargs), ) else: num_input = (int(num_input), ) if isinstance(num_input, (int, float)) else num_input for num in num_input: input_list.extend( generate_databases(algorithm, num, default_kwargs=default_kwargs, sensitivity=sensitivity)) result = [] # convert int/float or iterable into tuple (so that it has length information) test_epsilon = (test_epsilon, ) if isinstance(test_epsilon, (int, float)) else test_epsilon pool = mp.Pool(mp.cpu_count()) if cores == 0 else ( mp.Pool(cores) if cores != 1 else None) try: for _, epsilon in tqdm.tqdm(enumerate(test_epsilon), total=len(test_epsilon), unit='test', desc='Detection', disable=quiet): d1, d2, kwargs, event = select_event(algorithm, input_list, epsilon, event_iterations, quiet=quiet, process_pool=pool) p = hypothesis_test(algorithm, d1, d2, kwargs, event, epsilon, detect_iterations, report_p2=False, process_pool=pool) result.append((epsilon, float(p), d1, d2, kwargs, event)) if not quiet: tqdm.tqdm.write( 'Epsilon: {} | p-value: {:5.3f} | Event: {}'.format( epsilon, p, event)) logger.debug('D1: {} | D2: {} | kwargs: {}'.format(d1, d2, kwargs)) finally: if pool: pool.close() pool.join() return result
def detect_counterexample(algorithm, test_epsilon, default_kwargs=None, databases=None, num_input=(5, 10), event_iterations=100000, detect_iterations=500000, cores=None, sensitivity=ALL_DIFFER, quiet=False, loglevel=logging.INFO): """ :param algorithm: The algorithm to test for. :param test_epsilon: The privacy budget to test for, can either be a number or a tuple/list. :param default_kwargs: The default arguments the algorithm needs except the first Queries argument. :param databases: The databases to run for detection, optional. :param num_input: The length of input to generate, not used if database param is specified. :param event_iterations: The iterations for event selector to run. :param detect_iterations: The iterations for detector to run. :param cores: The number of max processes to set for multiprocessing.Pool(), os.cpu_count() is used if None. :param sensitivity: The sensitivity setting, all queries can differ by one or just one query can differ by one. :param quiet: Do not print progress bar or messages, logs are not affected. :param loglevel: The loglevel for logging package. :return: [(epsilon, p, d1, d2, kwargs, event)] The epsilon-p pairs along with databases/arguments/selected event. """ # initialize an empty default kwargs if None is given default_kwargs = default_kwargs if default_kwargs else {} logging.basicConfig(level=loglevel) logger.info( f'Start detection for counterexample on {algorithm.__name__} with test epsilon {test_epsilon}' ) logger.info( f'Options -> default_kwargs: {default_kwargs} | databases: {databases} | cores:{cores}' ) input_list = [] if databases is not None: d1, d2 = databases kwargs = generate_arguments(algorithm, d1, d2, default_kwargs=default_kwargs) input_list = ((d1, d2, kwargs), ) else: num_input = (int(num_input), ) if isinstance(num_input, (int, float)) else num_input for num in num_input: input_list.extend( generate_databases(algorithm, num, default_kwargs=default_kwargs, sensitivity=sensitivity)) # ------------ BEGIN EDITS ------------ new_input_list = [] for db in input_list: d1, d2, kwargs = db new_kwargs = kwargs.copy() # remember the first input (for HammingDistance postprocessing) new_kwargs['_d1'] = d1 new_input_list.append((d1, d2, new_kwargs)) input_list = new_input_list # ------------ END EDITS ------------ result = [] # convert int/float or iterable into tuple (so that it has length information) test_epsilon = (test_epsilon, ) if isinstance(test_epsilon, (int, float)) else test_epsilon with mp.Pool(cores) as pool: for _, epsilon in tqdm.tqdm(enumerate(test_epsilon), total=len(test_epsilon), unit='test', desc='Detection', disable=quiet): d1, d2, kwargs, event = select_event(algorithm, input_list, epsilon, event_iterations, quiet=quiet, process_pool=pool) p = hypothesis_test(algorithm, d1, d2, kwargs, event, epsilon, detect_iterations, report_p2=False, process_pool=pool) result.append((epsilon, float(p), d1, d2, kwargs, event)) if not quiet: tqdm.tqdm.write( f'Epsilon: {epsilon} | p-value: {p:5.3f} | Event: {event}') logger.debug(f'D1: {d1} | D2: {d2} | kwargs: {kwargs}') return result
def test_generate_arguments(): d1, d2 = tuple(1 for _ in range(5)), tuple(2 for _ in range(5)) assert generate_arguments(noisy_max_v1a, d1, d2, {}) is None
def detect_counterexample(algorithm, test_epsilon, default_kwargs=None, databases=None, num_input=(5, 10), event_iterations=100000, detect_iterations=500000, cores=0, quiet=False, loglevel=logging.INFO): """ :param algorithm: The algorithm to test for. :param test_epsilon: The privacy budget to test for, can either be a number or a tuple/list. :param default_kwargs: The default arguments the algorithm needs except the first Queries argument. :param databases: The databases to run for detection, optional. :param num_input: The length of input to generate, not used if database param is specified. :param event_iterations: The iterations for event selector to run, default is 100000. :param detect_iterations: The iterations for detector to run, default is 500000. :param cores: The cores to utilize, 0 means auto-detection. :param quiet: Do not print progress bar or messages, logs are not affected, default is False. :param loglevel: The loglevel for logging package. :return: [(epsilon, p, d1, d2, kwargs, event)] The epsilon-p pairs along with databases/arguments/selected event. """ # initialize an empty default kwargs if None is given default_kwargs = default_kwargs if default_kwargs else {} logging.basicConfig(level=loglevel) logger.info('Starting to find counterexample on algorithm {} with test epsilon {}' .format(algorithm.__name__, test_epsilon)) logger.info('Options -> default_kwargs: {} | databases: {} | cores:{}'.format(default_kwargs, databases, cores)) input_list = [] if databases is not None: d1, d2 = databases kwargs = generate_arguments(algorithm, d1, d2, default_kwargs=default_kwargs) input_list = ((d1, d2, kwargs),) else: num_input = (int(num_input), ) if isinstance(num_input, (int, float)) else num_input for num in num_input: input_list.extend(generate_databases(algorithm, num, default_kwargs=default_kwargs)) result = [] # convert int/float or iterable into tuple (so that it has length information) test_epsilon = (test_epsilon, ) if isinstance(test_epsilon, (int, float)) else test_epsilon if isinstance(test_epsilon, (int, float)): test_epsilon = (test_epsilon, ) elif not isinstance(test_epsilon, (tuple, list)): test_epsilon = tuple(test_epsilon) pool = mp.Pool(mp.cpu_count()) if cores == 0 else (mp.Pool(cores) if cores != 1 else None) try: for i, epsilon in tqdm.tqdm(enumerate(test_epsilon), total=len(test_epsilon), unit='test', desc='Detection'): d1, d2, kwargs, event = select_event(algorithm, input_list, epsilon, event_iterations, quiet=quiet, process_pool=pool) p = hypothesis_test(algorithm, d1, d2, kwargs, event, epsilon, detect_iterations, report_p2=False, process_pool=pool) result.append((epsilon, p, d1, d2, kwargs, event)) tqdm.tqdm.write('Epsilon: {} | p-value: {:5.3f} | Event: {}' .format(epsilon, p, event)) logger.debug('D1: {} | D2: {} | kwargs: {}'.format(d1, d2, kwargs)) finally: if pool: pool.close() pool.join() else: pass return result