def local_generation(num_attribs, l_num, g_id, protected_attribs, constraint, model, s_l, epsilon): # local generation phase of ADF direction = [-1, 1] l_id = np.empty(shape=(0, num_attribs)) all_gen_l = np.empty(shape=(0, num_attribs)) try_times = 0 for x1 in g_id: x0 = x1.copy() for _ in range(l_num): try_times += 1 similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) x2 = generation_utilities.find_pair(x1, similar_x1, model) grad1 = compute_grad(x1, model) grad2 = compute_grad(x2, model) p = generation_utilities.normalization(grad1, grad2, protected_attribs, epsilon) a = generation_utilities.random_pick(p) s = generation_utilities.random_pick([0.5, 0.5]) x1[a] = x1[a] + direction[s] * s_l x1 = generation_utilities.clip(x1, constraint) all_gen_l = np.append(all_gen_l, [x1], axis=0) similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(x1, similar_x1, model): l_id = np.append(l_id, [x1], axis=0) else: x1 = x0.copy() l_id = np.array(list(set([tuple(id) for id in l_id]))) return l_id, all_gen_l, try_times
def seedwise_generation(X, seeds, protected_attribs, constraint, model, l_num, max_iter=10, s_g=1.0, s_l=1.0, epsilon=1e-6): # perform global generation and local generation successively on each single seed num_seeds = len(seeds) num_gen = np.array([0] * num_seeds) num_ids = np.array([0] * num_seeds) num_attribs = len(X[0]) ids = np.empty(shape=(0, num_attribs)) all_gen = np.empty(shape=(0, num_attribs)) direction_l = [-1, 1] for index, instance in enumerate(seeds): x1 = instance.copy() flag = False for _ in range(max_iter): similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(x1, similar_x1, model): ids = np.append(ids, [x1], axis=0) flag = True break x2 = generation_utilities.max_diff(x1, similar_x1, model) grad1 = compute_grad(x1, model) grad2 = compute_grad(x2, model) direction_g = np.zeros_like(X[0]) sign_grad1 = np.sign(grad1) sign_grad2 = np.sign(grad2) for attrib in range(num_attribs): if attrib not in protected_attribs and sign_grad1[attrib] == sign_grad2[attrib]: direction_g[attrib] = sign_grad1[attrib] x1 = x1 + s_g * direction_g x1 = generation_utilities.clip(x1, constraint) all_gen = np.append(all_gen, [x1], axis=0) if flag == True: x0 = x1.copy() for _ in range(l_num): similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) x2 = generation_utilities.find_pair(x1, similar_x1, model) grad1 = compute_grad(x1, model) grad2 = compute_grad(x2, model) p = generation_utilities.normalization(grad1, grad2, protected_attribs, epsilon) a = generation_utilities.random_pick(p) s = generation_utilities.random_pick([0.5, 0.5]) x1[a] = x1[a] + direction_l[s] * s_l x1 = generation_utilities.clip(x1, constraint) all_gen = np.append(all_gen, [x1], axis=0) similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(x1, similar_x1, model): ids = np.append(ids, [x1], axis=0) else: x1 = x0.copy() nondup_ids = np.array(list(set([tuple(id) for id in ids]))) nondup_gen = np.array(list(set([tuple(gen) for gen in all_gen]))) num_gen[index] = len(nondup_gen) num_ids[index] = len(nondup_ids) return num_gen, num_ids
def global_generation(X, seeds, num_attribs, g_num, protected_attribs, constraint, model, max_iter, s_g): # global generation phase of ADF g_id = np.empty(shape=(0, num_attribs)) all_gen_g = np.empty(shape=(0, num_attribs)) try_times = 0 for i in range(g_num): x1 = seeds[i].copy() for _ in range(max_iter): try_times += 1 similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(x1, similar_x1, model): g_id = np.append(g_id, [x1], axis=0) break x2 = generation_utilities.max_diff(x1, similar_x1, model) grad1 = compute_grad(x1, model) grad2 = compute_grad(x2, model) direction = np.zeros_like(X[0]) sign_grad1 = np.sign(grad1) sign_grad2 = np.sign(grad2) for attrib in range(num_attribs): if attrib not in protected_attribs and sign_grad1[attrib] == sign_grad2[attrib]: direction[attrib] = sign_grad1[attrib] x1 = x1 + s_g * direction x1 = generation_utilities.clip(x1, constraint) all_gen_g = np.append(all_gen_g, [x1], axis=0) g_id = np.array(list(set([tuple(id) for id in g_id]))) return g_id, all_gen_g, try_times
def time_record(X, seeds, protected_attribs, constraint, model, g_num, l_num, record_step, record_frequency, max_iter=10, s_g=1.0, s_l=1.0, epsilon=1e-6): # record time consumption t1 = time.time() num_attribs = len(X[0]) t = np.array([0.0] * record_frequency) direction_l = [-1, 1] threshold = record_step index = 0 ids = np.empty(shape=(0, num_attribs)) num_ids = num_ids_before = 0 for instance in seeds: if num_ids >= record_frequency * record_step: break x1 = instance.copy() flag = False for i in range(max_iter+1): similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(x1, similar_x1, model): ids = np.append(ids, [x1], axis=0) flag = True break if i == max_iter: break x2 = generation_utilities.max_diff(x1, similar_x1, model) grad1 = compute_grad(x1, model) grad2 = compute_grad(x2, model) direction_g = np.zeros_like(X[0]) sign_grad1 = np.sign(grad1) sign_grad2 = np.sign(grad2) for attrib in range(num_attribs): if attrib not in protected_attribs and sign_grad1[attrib] == sign_grad2[attrib]: direction_g[attrib] = sign_grad1[attrib] x1 = x1 + s_g * direction_g x1 = generation_utilities.clip(x1, constraint) t2 = time.time() if flag == True: ids = np.array(list(set([tuple(id) for id in ids]))) num_ids = len(ids) if num_ids > num_ids_before: num_ids_before = num_ids if num_ids == threshold: t[index] = t2 - t1 threshold += record_step index += 1 if num_ids >= record_frequency * record_step: break x0 = x1.copy() for _ in range(l_num): similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) x2 = generation_utilities.find_pair(x1, similar_x1, model) grad1 = compute_grad(x1, model) grad2 = compute_grad(x2, model) p = generation_utilities.normalization(grad1, grad2, protected_attribs, epsilon) a = generation_utilities.random_pick(p) s = generation_utilities.random_pick([0.5, 0.5]) x1[a] = x1[a] + direction_l[s] * s_l x1 = generation_utilities.clip(x1, constraint) t2 = time.time() similar_x1 = generation_utilities.similar_set(x1, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(x1, similar_x1, model): ids = np.append(ids, [x1], axis=0) ids = np.array(list(set([tuple(id) for id in ids]))) num_ids = len(ids) if num_ids > num_ids_before: num_ids_before = num_ids if num_ids == threshold: t[index] = t2 - t1 threshold += record_step index += 1 if num_ids >= record_frequency * record_step: break else: x1 = x0.copy() return t
def local_comparison(num_experiment_round, benchmark, X, protected_attribs, constraint, model, update_interval_list, num_seeds=100, l_num=1000, c_num=4, s_l=1.0, epsilon=1e-6): # compare the local phase given the same individual discriminatory instances set num_ids = np.array([0] * (len(update_interval_list) + 1)) time_cost = np.array([0] * (len(update_interval_list) + 1)) for i in range(num_experiment_round): round_now = i + 1 print('--- ROUND', round_now, '---') num_attribs = len(X[0]) clustered_data = generation_utilities.clustering(X, c_num) id_seeds = np.empty(shape=(0, num_attribs)) for i in range(100000000): x_seed = generation_utilities.get_seed(clustered_data, len(X), c_num, i % c_num, fashion='RoundRobin') similar_x_seed = generation_utilities.similar_set( x_seed, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(x_seed, similar_x_seed, model): id_seeds = np.append(id_seeds, [x_seed], axis=0) if len(id_seeds) >= num_seeds: break t1 = time.time() ids_ADF, _, total_iter_ADF = ADF.local_generation( num_attribs, l_num, id_seeds.copy(), protected_attribs, constraint, model, s_l, epsilon) t2 = time.time() num_ids_ADF = len(ids_ADF) print( 'ADF:', 'In', total_iter_ADF, 'search iterations,', num_ids_ADF, 'non-duplicate individual discriminatory instances are generated. Time cost:', t2 - t1, 's.') num_ids[0] += num_ids_ADF time_cost[0] += t2 - t1 for index, update_interval in enumerate(update_interval_list): print('Update interval set to {}:'.format(update_interval)) t1 = time.time() ids_EIDIG, _, total_iter_EIDIG = EIDIG.local_generation( num_attribs, l_num, id_seeds.copy(), protected_attribs, constraint, model, update_interval, s_l, epsilon) t2 = time.time() num_ids_EIDIG = len(ids_EIDIG) print( 'EIDIG:', 'In', total_iter_EIDIG, 'search iterations,', num_ids_EIDIG, 'non-duplicate individual discriminatory instances are generated. Time cost:', t2 - t1, 's.') num_ids[index + 1] += num_ids_EIDIG time_cost[index + 1] += t2 - t1 print('\n') avg_num_ids = num_ids / num_experiment_round avg_speed = num_ids / time_cost print( 'Results of local phase comparsion on', benchmark, 'with l_num set to {} given {} discriminatory seeds'.format( l_num, num_seeds), ',averaged on', num_experiment_round, 'rounds:') print('ADF:', avg_num_ids[0], 'individual discriminatory instances are generated at a speed of', avg_speed[0], 'per second.') for index, update_interval in enumerate(update_interval_list): print('Update interval set to {}:'.format(update_interval)) print( 'EIDIG:', avg_num_ids[index + 1], 'individual discriminatory instances are generated at a speed of', avg_speed[index + 1], 'per second.') return num_ids, time_cost
def global_comparison(num_experiment_round, benchmark, X, protected_attribs, constraint, model, decay_list, num_seeds=1000, c_num=4, max_iter=10, s_g=1.0): # compare the global phase given the same set of seeds num_ids = np.array([0] * (len(decay_list) + 1)) num_iter = np.array([0] * (len(decay_list) + 1)) time_cost = np.array([0] * (len(decay_list) + 1)) for i in range(num_experiment_round): round_now = i + 1 print('--- ROUND', round_now, '---') num_attribs = len(X[0]) num_dis = 0 if num_seeds >= len(X): seeds = X else: clustered_data = generation_utilities.clustering(X, c_num) seeds = np.empty(shape=(0, num_attribs)) for i in range(num_seeds): x_seed = generation_utilities.get_seed(clustered_data, len(X), c_num, i % c_num, fashion='Distribution') seeds = np.append(seeds, [x_seed], axis=0) for seed in seeds: similar_seed = generation_utilities.similar_set( seed, num_attribs, protected_attribs, constraint) if generation_utilities.is_discriminatory(seed, similar_seed, model): num_dis += 1 print('Given', num_seeds, '(no more than 600 for german credit) seeds,', num_dis, 'of which are individual discriminatory instances.') t1 = time.time() ids_ADF, _, total_iter_ADF = ADF.global_generation( X, seeds, num_attribs, num_seeds, protected_attribs, constraint, model, max_iter, s_g) t2 = time.time() num_ids_ADF = len(ids_ADF) print( 'ADF:', 'In', total_iter_ADF, 'search iterations,', num_ids_ADF, 'non-duplicate individual discriminatory instances are generated. Time cost:', t2 - t1, 's.') num_ids[0] += num_ids_ADF num_iter[0] += total_iter_ADF time_cost[0] += t2 - t1 for index, decay in enumerate(decay_list): print('Decay factor set to {}:'.format(decay)) t1 = time.time() ids_EIDIG, _, total_iter_EIDIG = EIDIG.global_generation( X, seeds, num_attribs, num_seeds, protected_attribs, constraint, model, decay, max_iter, s_g) t2 = time.time() num_ids_EIDIG = len(ids_EIDIG) print( 'EIDIG:', 'In', total_iter_EIDIG, 'search iterations,', num_ids_EIDIG, 'non-duplicate individual discriminatory instances are generated. Time cost:', t2 - t1, 's.') num_ids[index + 1] += num_ids_EIDIG num_iter[index + 1] += total_iter_EIDIG time_cost[index + 1] += t2 - t1 print('\n') avg_num_ids = num_ids / num_experiment_round avg_speed = num_ids / time_cost avg_iter = num_iter / num_experiment_round / num_seeds print('Results of global phase comparsion on', benchmark, 'given {} seeds'.format(num_seeds), ',averaged on', num_experiment_round, 'rounds:') print('ADF:', avg_num_ids[0], 'individual discriminatory instances are generated at a speed of', avg_speed[0], 'per second, and the number of iterations on a singe seed is', avg_iter[0], '.') for index, decay in enumerate(decay_list): print('Decay factor set to {}:'.format(decay)) print( 'EIDIG:', avg_num_ids[index + 1], 'individual discriminatory instances are generated at a speed of', avg_speed[index + 1], 'per second, and the number of iterations on a singe seed is', avg_iter[index + 1], '.') return num_ids, num_iter, time_cost