def training(dataset, model_path): """ Train the model :param dataset: the name of testing dataset :param model_path: the path to save trained model """ data = {"census": census_data, "credit": credit_data, "bank": bank_data} # prepare the data and model X, Y, input_shape, nb_classes = data[dataset]() print("-->x, y, input_shape, nb_classes", X, Y, input_shape, nb_classes) tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) model = dnn(input_shape, nb_classes) preds = model(x) print("-->preds:", preds) # training parameters train_params = { 'nb_epochs': 1000, 'batch_size': 128, 'learning_rate': 0.01, 'train_dir': model_path, 'filename': 'test.model' } # training procedure sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2019, 7, 15]) model_train(sess, x, y, preds, X, Y, args=train_params, rng=rng, save=False) # evaluate the accuracy of trained model eval_params = {'batch_size': 128} accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
def gf(dataset, sens_param, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() data = {"census": census_data, "credit": credit_data, "bank": bank_data} data_config = {"census": census, "credit": credit, "bank": bank} # data preprocessing X, Y, input_shape, nb_classes = data[dataset](sens_param) X_original = np.array(X) Y_original = np.array(Y) # model structure model = dnn(input_shape, nb_classes) # tf operation tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) preds = model(x) saver = tf.train.Saver() saver.restore(sess, '../models/' + dataset + '/999/test.model') eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for original model: {0}'. format(accuracy)) num_weights = 0 for layer in model.layers: if "Conv2D" in layer.__class__.__name__: shape = layer.kernels.shape num_weights += int(shape[0] * shape[1] * shape[2] * shape[3]) elif "BN" in layer.__class__.__name__: shape = layer.gamma.shape num_weights += int(shape[0]) elif "Linear" in layer.__class__.__name__: shape = layer.W.shape num_weights += int(shape[0] * shape[1]) indices = np.random.choice(num_weights, int(num_weights * ration), replace=False) weights_count = 0 for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: shape = layer.kernels.shape num_weights_layer = int(shape[0] * shape[1] * shape[2] * shape[3]) mutated_indices = set(indices) & set( np.arange(weights_count, weights_count + num_weights_layer)) if mutated_indices: mutated_indices = np.array( list(mutated_indices)) - weights_count current_weights = sess.run(layer.kernels).reshape(-1) avg_weights = np.mean(current_weights) std_weights = np.std(current_weights) mutated_weights = np.random.normal(avg_weights, std_weights, mutated_indices.size) current_weights[mutated_indices] = mutated_weights update_weights = tf.assign(layer.kernels, current_weights.reshape(shape)) sess.run(update_weights) weights_count += num_weights_layer elif "BN" in layer.__class__.__name__: shape = layer.gamma.shape num_weights_layer = int(shape[0]) mutated_indices = set(indices) & set( np.arange(weights_count, weights_count + num_weights_layer)) if mutated_indices: mutated_indices = np.array( list(mutated_indices)) - weights_count current_weights = sess.run(layer.gamma).reshape(-1) avg_weights = np.mean(current_weights) std_weights = np.std(current_weights) mutated_weights = np.random.normal(avg_weights, std_weights, mutated_indices.size) current_weights[mutated_indices] = mutated_weights update_weights = tf.assign(layer.gamma, current_weights.reshape(shape)) sess.run(update_weights) weights_count += num_weights_layer elif "Linear" in layer.__class__.__name__: shape = layer.W.shape num_weights_layer = int(shape[0] * shape[1]) mutated_indices = set(indices) & set( np.arange(weights_count, weights_count + num_weights_layer)) if mutated_indices: mutated_indices = np.array( list(mutated_indices)) - weights_count current_weights = sess.run(layer.W).reshape(-1) avg_weights = np.mean(current_weights) std_weights = np.std(current_weights) mutated_weights = np.random.normal(avg_weights, std_weights, mutated_indices.size) current_weights[mutated_indices] = mutated_weights update_weights = tf.assign(layer.W, current_weights.reshape(shape)) sess.run(update_weights) weights_count += num_weights_layer mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for mutated model: {0}'. format(mutated_accuracy)) # if mutated_accuracy >= threshold * accuracy: # train_dir = os.path.join(path.mu_model_path, 'gf', datasets + '_' + model_name, '0') # if not os.path.exists(train_dir): # os.makedirs(train_dir) # save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') # saver = tf.train.Saver() # saver.save(sess, save_path) sess.close()
def aequitas(dataset, sensitive_param, model_path, max_global, max_local, step_size): """ The implementation of AEQUITAS_Fully_Connected :param dataset: the name of testing dataset :param sensitive_param: the name of testing dataset :param model_path: the path of testing model :param max_global: the maximum number of samples for global search :param max_local: the maximum number of samples for local search :param step_size: the step size of perturbation :return: """ data = {"census": census_data, "credit": credit_data, "bank": bank_data} data_config = {"census": census, "credit": credit, "bank": bank} params = data_config[dataset].params # hyper-parameters for initial probabilities of directions init_prob = 0.5 direction_probability = [init_prob] * params direction_probability_change_size = 0.001 # hyper-parameters for features param_probability = [1.0 / params] * params param_probability_change_size = 0.001 # prepare the testing data and model X, Y, input_shape, nb_classes = data[dataset]() model = dnn(input_shape, nb_classes) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) preds = model(x) tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) saver = tf.train.Saver() saver.restore(sess, model_path) # store the result of fairness testing global_disc_inputs = set() global_disc_inputs_list = [] local_disc_inputs = set() local_disc_inputs_list = [] tot_inputs = set() # initial input if dataset == "census": initial_input = [7, 4, 26, 1, 4, 4, 0, 0, 0, 1, 5, 73, 1] elif dataset == "credit": initial_input = [ 2, 24, 2, 2, 37, 0, 1, 2, 1, 0, 4, 2, 2, 2, 1, 1, 2, 1, 0, 0 ] elif dataset == "bank": initial_input = [3, 11, 2, 0, 0, 5, 1, 0, 0, 5, 4, 40, 1, 1, 0, 0] minimizer = {"method": "L-BFGS-B"} def evaluate_local(inp): """ Evaluate whether the test input after local perturbation is an individual discriminatory instance :param inp: test input :return: whether it is an individual discriminatory instance """ result = check_for_error_condition(data_config[dataset], sess, x, preds, inp, sensitive_param) temp = copy.deepcopy(inp.astype('int').tolist()) temp = temp[:sensitive_param - 1] + temp[sensitive_param:] tot_inputs.add(tuple(temp)) if result != int(inp[sensitive_param - 1]) and ( tuple(temp) not in global_disc_inputs) and (tuple(temp) not in local_disc_inputs): local_disc_inputs.add(tuple(temp)) local_disc_inputs_list.append(temp) return not result global_discovery = Global_Discovery(data_config[dataset]) local_perturbation = Local_Perturbation( sess, preds, x, data_config[dataset], sensitive_param, param_probability, param_probability_change_size, direction_probability, direction_probability_change_size, step_size) length = min(max_global, len(X)) value_list = [] for i in range(length): # global generation inp = global_discovery.__call__(initial_input) temp = copy.deepcopy(inp) temp = temp[:sensitive_param - 1] + temp[sensitive_param:] tot_inputs.add(tuple(temp)) result = check_for_error_condition(data_config[dataset], sess, x, preds, inp, sensitive_param) # if get an individual discriminatory instance if result != inp[sensitive_param - 1] and ( tuple(temp) not in global_disc_inputs) and (tuple(temp) not in local_disc_inputs): global_disc_inputs_list.append(temp) global_disc_inputs.add(tuple(temp)) value_list.append([inp[sensitive_param - 1], result]) # local generation basinhopping(evaluate_local, inp, stepsize=1.0, take_step=local_perturbation, minimizer_kwargs=minimizer, niter=max_local) # create the folder for storing the fairness testing result if not os.path.exists('../results/'): os.makedirs('../results/') if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') if not os.path.exists('../results/' + dataset + '/' + str(sensitive_param) + '/'): os.makedirs('../results/' + dataset + '/' + str(sensitive_param) + '/') # storing the fairness testing result np.save( '../results/' + dataset + '/' + str(sensitive_param) + '/global_samples_aequitas.npy', np.array(global_disc_inputs_list)) np.save( '../results/' + dataset + '/' + str(sensitive_param) + '/disc_value_aequitas.npy', np.array(value_list)) np.save( '../results/' + dataset + '/' + str(sensitive_param) + '/local_samples_aequitas.npy', np.array(local_disc_inputs_list)) # print the overview information of result print("Total Inputs are " + str(len(tot_inputs))) print("Total discriminatory inputs of global search- " + str(len(global_disc_inputs))) print("Total discriminatory inputs of local search- " + str(len(local_disc_inputs)))
def ns(dataset, sens_param, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() data = {"census": census_data, "credit": credit_data, "bank": bank_data} data_config = {"census": census, "credit": credit, "bank": bank} # data preprocessing X, Y, input_shape, nb_classes = data[dataset](sens_param) X_original = np.array(X) Y_original = np.array(Y) # model structure model = dnn(input_shape, nb_classes) # tf operation tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) preds = model(x) saver = tf.train.Saver() saver.restore(sess, '../models/' + dataset + '/999/test.model') eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for original model: {0}'. format(accuracy)) for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: unique_neurons_layer = layer.output_channels shuffle_num = unique_neurons_layer * ration if shuffle_num > 1.0: shuffle_num = math.floor( shuffle_num) if shuffle_num > 2.0 else math.ceil( shuffle_num) mutated_neurons = np.random.choice(unique_neurons_layer, int(shuffle_num), replace=False) current_weights = sess.run(layer.kernels).transpose( [3, 0, 1, 2]) current_bias = sess.run(layer.b) shuffle_neurons = copy.copy(mutated_neurons) np.random.shuffle(shuffle_neurons) current_weights[mutated_neurons] = current_weights[ shuffle_neurons] current_bias[mutated_neurons] = current_bias[shuffle_neurons] update_weights = tf.assign( layer.kernels, current_weights.transpose([1, 2, 3, 0])) update_bias = tf.assign(layer.b, current_bias) sess.run(update_weights) sess.run(update_bias) if "BN" in model.layers[i + 1].__class__.__name__: layer = model.layers[i + 1] current_gamma = sess.run(layer.gamma) current_beta = sess.run(layer.beta) current_moving_mean = sess.run(layer.moving_mean) current_moving_variance = sess.run(layer.moving_variance) current_gamma[mutated_neurons] = current_gamma[ shuffle_neurons] current_beta[mutated_neurons] = current_beta[ shuffle_neurons] current_moving_mean[mutated_neurons] = current_moving_mean[ shuffle_neurons] current_moving_variance[ mutated_neurons] = current_moving_variance[ shuffle_neurons] update_gamma = tf.assign(layer.gamma, current_gamma) update_beta = tf.assign(layer.beta, current_beta) update_moving_mean = tf.assign(layer.moving_mean, current_moving_mean) update_moving_variance = tf.assign( layer.moving_variance, current_moving_variance) sess.run(update_gamma) sess.run(update_beta) sess.run(update_moving_mean) sess.run(update_moving_variance) elif "Linear" in layer.__class__.__name__: unique_neurons_layer = layer.num_hid shuffle_num = unique_neurons_layer * ration if shuffle_num > 1.0: shuffle_num = math.floor( shuffle_num) if shuffle_num > 2.0 else math.ceil( shuffle_num) mutated_neurons = np.random.choice(unique_neurons_layer, int(shuffle_num), replace=False) current_weights = sess.run(layer.W).transpose([1, 0]) current_bias = sess.run(layer.b) shuffle_neurons = copy.copy(mutated_neurons) np.random.shuffle(shuffle_neurons) current_weights[mutated_neurons] = current_weights[ shuffle_neurons] current_bias[mutated_neurons] = current_bias[shuffle_neurons] update_weights = tf.assign(layer.W, current_weights.transpose([1, 0])) update_bias = tf.assign(layer.b, current_bias) sess.run(update_weights) sess.run(update_bias) mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for mutated model: {0}'. format(mutated_accuracy)) # if mutated_accuracy >= threshold * accuracy: # train_dir = os.path.join(path.mu_model_path, 'ns', dataset + '_' + model_name, '0') # if not os.path.exists(train_dir): # os.makedirs(train_dir) # save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') # saver = tf.train.Saver() # saver.save(sess, save_path) sess.close()
def ws(dataset, sens_param, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() data = {"census": census_data, "credit": credit_data, "bank": bank_data} data_config = {"census": census, "credit": credit, "bank": bank} # data preprocessing X, Y, input_shape, nb_classes = data[dataset](sens_param) X_original = np.array(X) Y_original = np.array(Y) # model structure model = dnn(input_shape, nb_classes) # tf operation tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) preds = model(x) saver = tf.train.Saver() saver.restore(sess, '../models/' + dataset + '/999/test.model') eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for original model: {0}'. format(accuracy)) unique_neurons = 0 for layer in model.layers: if "Conv2D" in layer.__class__.__name__: unique_neurons += layer.output_channels elif "Linear" in layer.__class__.__name__: unique_neurons += layer.num_hid # every BN neuron only connected with a previous neuron indices = np.random.choice(unique_neurons, int(unique_neurons * ration), replace=False) neurons_count = 0 for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: unique_neurons_layer = layer.output_channels mutated_neurons = set(indices) & set( np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array( list(mutated_neurons)) - neurons_count current_weights = sess.run(layer.kernels).transpose( [3, 0, 1, 2]) for neuron in mutated_neurons: old_data = current_weights[neuron].reshape(-1) shuffle_index = np.arange(len(old_data)) np.random.shuffle(shuffle_index) new_data = old_data[shuffle_index].reshape( layer.kernels.shape[0], layer.kernels.shape[1], layer.kernels.shape[2]) current_weights[neuron] = new_data update_weights = tf.assign( layer.kernels, current_weights.transpose([1, 2, 3, 0])) sess.run(update_weights) neurons_count += unique_neurons_layer elif "Linear" in layer.__class__.__name__: unique_neurons_layer = layer.num_hid mutated_neurons = set(indices) & set( np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array( list(mutated_neurons)) - neurons_count current_weights = sess.run(layer.W).transpose([1, 0]) for neuron in mutated_neurons: old_data = current_weights[neuron] shuffle_index = np.arange(len(old_data)) np.random.shuffle(shuffle_index) new_data = old_data[shuffle_index] current_weights[neuron] = new_data update_weights = tf.assign(layer.W, current_weights.transpose([1, 0])) sess.run(update_weights) neurons_count += unique_neurons_layer mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for mutated model: {0}'. format(mutated_accuracy)) # if mutated_accuracy >= threshold * accuracy: # train_dir = os.path.join(path.mu_model_path, 'ws', dataset + '_' + model_name, '0') # if not os.path.exists(train_dir): # os.makedirs(train_dir) # save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') # saver = tf.train.Saver() # saver.save(sess, save_path) sess.close()
def interpretability(filename, dataset, directorys, k_values, thresholds): data = {"census": census_data, "credit": credit_data, "bank": bank_data} data_config = {"census": census, "credit": credit, "bank": bank} params = data_config[dataset].params X, Y, input_shape, nb_classes = data[dataset]() config = tf.ConfigProto() conf = data_config[dataset] config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) model = dnn(input_shape, nb_classes) preds = model(x) # print("-->preds ", preds) saver = tf.train.Saver() # model_file = "../retrained_models/" + dataset + "_df/999/test.model" model_file = "../models/" + dataset + "/test.model" saver.restore(sess, model_file) grad_0 = gradient_graph(x, preds) tfops = tf.sign(grad_0) # process dataset dataset_list = load_csv(filename) del dataset_list[0] for i in range(len(dataset_list[0])): str_column_to_float(dataset_list, i) print("-->dataset:", np.array(dataset_list)) print(np.array(dataset_list).shape) model_dataset = [] row_data = [] for d in dataset_list: del (d[-1]) row_data.append(d) probs = model_prediction(sess, x, preds, np.array([d ]))[0] # n_probs: prediction vector label = np.argmax(probs) # GET index of max value in n_probs d.append(label) model_dataset.append(d) print("-->dataset:", np.array(model_dataset)) print(np.array(model_dataset).shape) original_dataset = model_dataset ###### # use DT with highest accuracy def get_dt(directory, k_value): tree_file = directory + "DT_trees" all_DT_trees = [] # print("-->tree_file", tree_file) with open(tree_file, 'r') as f: for line in f: all_DT_trees.append(line.split("\n")[0]) accuracy_file = directory + "accuracy" accuracy = [] max_accuracy_feature = 0 max_accuracy = 0 with open(accuracy_file, 'r') as f: # 1 lines = f.readlines() # 2 i = 0 for line in lines: # 3 value = [float(s) for s in line.split()] # 2 accuracy.append(value[0]) # 5 if value[0] > max_accuracy: max_accuracy = value[0] max_accuracy_feature = i i += 1 all_feature_set = list(range(1, params + 1)) feature_sets = list(itertools.combinations(all_feature_set, k_value)) print("-->selected feature_set:", feature_sets[max_accuracy_feature], max_accuracy_feature, max_accuracy) # if k_value == 3: # max_accuracy_feature = 170 # print(feature_sets[max_accuracy_feature]) # if k_value == 2: # max_accuracy_feature = 50 # 72 # print(feature_sets[max_accuracy_feature]) feature_sets = [feature_sets[max_accuracy_feature]] return feature_sets, all_DT_trees[max_accuracy_feature] def get_labels1(tree): original_labels = [i[-1] for i in original_dataset] predict_labels = [] for d in row_data: d = map(int, d) label = predict(tree, d) predict_labels.append(label) return original_labels, predict_labels def get_labels(tree, test_datas): original_labels = [] predict_labels = [] for d in test_datas: probs = model_prediction(sess, x, preds, np.array( [d]))[0] # n_probs: prediction vector model_label = np.argmax(probs) # GET index of max value in n_probs original_labels.append(model_label) tree_label = predict(tree, d, feature_sets[0]) predict_labels.append(tree_label) return original_labels, predict_labels def one_num(list): num = 0 for l in list: if l == 1: num += 1 return num def sprt_three_figure(all_prs, accept_pr, deny_pr, threshold, k_values): prs1 = all_prs[0] prs2 = all_prs[1] prs3 = all_prs[2] k_value1 = k_values[0] k_value2 = k_values[1] k_value3 = k_values[2] length = max(len(prs1), len(prs2), len(prs3)) Y = list(range(0, length)) title_name = "threshold=" + str(threshold) plt.title(title_name) accept_prs = [accept_pr] * length deny_prs = [deny_pr] * length plt.plot(Y, accept_prs, color='black', linestyle="--", label="accept_bound") plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound") plt.plot(list(range(0, len(prs1))), prs1, color='red', label="k=" + str(k_value1)) plt.plot(list(range(0, len(prs2))), prs2, color='blue', label="k=" + str(k_value2)) plt.plot(list(range(0, len(prs3))), prs3, color='green', label="k=" + str(k_value3)) # plt.legend() # plt.legend(loc=[0, 1]) # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True, shadow=True) plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=5) plt.xlabel('number of detected samples') plt.ylabel('rate') plt.show() def sprt_four_figure(all_prs, accept_pr, deny_pr, threshold, k_values): prs1 = all_prs[0] prs2 = all_prs[1] prs3 = all_prs[2] prs4 = all_prs[3] k_value1 = k_values[0] k_value2 = k_values[1] k_value3 = k_values[2] k_value4 = k_values[3] length = max(len(prs1), len(prs2), len(prs3), len(prs4)) Y = list(range(0, length)) title_name = "threshold=" + str(threshold) plt.title(title_name) accept_prs = [accept_pr] * length deny_prs = [deny_pr] * length plt.plot(Y, accept_prs, color='black', linestyle="--", label="accept_bound") plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound") plt.plot(list(range(0, len(prs1))), prs1, color='red', label="k=" + str(k_value1)) plt.plot(list(range(0, len(prs2))), prs2, color='blue', label="k=" + str(k_value2)) plt.plot(list(range(0, len(prs3))), prs3, color='green', label="k=" + str(k_value3)) plt.plot(list(range(0, len(prs4))), prs4, color='purple', label="k=" + str(k_value4)) # plt.legend() # plt.legend(loc=[0, 1]) # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True, shadow=True) plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=5) plt.xlabel('number of detected samples') plt.ylabel('rate') plt.show() def sprt_one_figure(prs, accept_pr, deny_pr, threshold, k_value): length = len(prs) Y = list(range(0, length)) title_name = "threshold=" + str(threshold) + " (k=" + str( k_value) + ")" plt.title(title_name) accept_prs = [accept_pr] * length deny_prs = [deny_pr] * length plt.plot(Y, accept_prs, color='black', linestyle="--", label="accept_bound") plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound") plt.plot(Y, prs, label="k=" + str(k_value)) # plt.plot(sub_axix, test_acys, color='red', label='testing accuracy') # plt.plot(x_axix, train_pn_dis, color='skyblue', label='PN distance') # plt.plot(x_axix, thresholds, color='blue', label='threshold') plt.legend() plt.xlabel('number of detected samples') plt.ylabel('rate') plt.show() all_prs = [] random_test_data = generate_random_data(1000, conf) print("-->random_test_data:", random_test_data) for i in range(0, len(directorys)): directory = directorys[i] k_value = k_values[i] print("-->dir, k", directory, k_value) feature_sets, tree = get_dt(directory, k_value) print("-->feature_set", feature_sets[0]) print("-->tree", tree) tree = dict(eval(tree)) original_labels, predict_labels = get_labels(tree, random_test_data) same_ratio(original_labels, predict_labels) print("-->one num", one_num(original_labels), one_num(predict_labels)) print("-->original labels", original_labels) print("-->predict labels", predict_labels) # print("-->sprt result:", sprt_detect(original_labels, predict_labels, threshold, k_value)) if_accept, same_count, total_count, prs, accept_pr, deny_pr = sprt_detect( original_labels, predict_labels, k_value, threshold) print("-->sprt result:", if_accept, same_count, total_count) all_prs.append(prs) sprt_detect_multiplethre(original_labels, predict_labels, k_value, thresholds) # test # sprt_three_figure(all_prs, accept_pr, deny_pr, threshold, k_values) # sprt_four_figure(all_prs, accept_pr, deny_pr, threshold, k_values) # sprt_one_figure(prs, accept_pr, deny_pr, k_value, threshold) return
def symbolic_generation(dataset, sensitive_param, model_path, cluster_num, limit): """ The implementation of symbolic generation :param dataset: the name of dataset :param sensitive_param: the index of sensitive feature :param model_path: the path of testing model :param cluster_num: the number of clusters to form as well as the number of centroids to generate :param limit: the maximum number of test case """ data = {"census": census_data, "credit": credit_data, "bank": bank_data} data_config = {"census": census, "credit": credit, "bank": bank} # the rank for priority queue, rank1 is for seed inputs, rank2 for local, rank3 for global rank1 = 5 rank2 = 1 rank3 = 10 T1 = 0.3 # prepare the testing data and model X, Y, input_shape, nb_classes = data[dataset]() arguments = gen_arguments(data_config[dataset]) model = dnn(input_shape, nb_classes) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) preds = model(x) tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) saver = tf.train.Saver() saver.restore(sess, model_path) # store the result of fairness testing global_disc_inputs = set() global_disc_inputs_list = [] local_disc_inputs = set() local_disc_inputs_list = [] tot_inputs = set() # select the seed input for fairness testing inputs = seed_test_input(dataset, cluster_num, limit) q = PriorityQueue() # low push first for inp in inputs[::-1]: q.put((rank1, X[inp].tolist())) visited_path = [] l_count = 0 g_count = 0 while len(tot_inputs) < limit and q.qsize() != 0: t = q.get() t_rank = t[0] t = np.array(t[1]) found = check_for_error_condition(data_config[dataset], sess, x, preds, t, sensitive_param) p = getPath(X, sess, x, preds, t, data_config[dataset]) temp = copy.deepcopy(t.tolist()) temp = temp[:sensitive_param - 1] + temp[sensitive_param:] tot_inputs.add(tuple(temp)) if found: if (tuple(temp) not in global_disc_inputs) and (tuple(temp) not in local_disc_inputs): if t_rank > 2: global_disc_inputs.add(tuple(temp)) global_disc_inputs_list.append(temp) else: local_disc_inputs.add(tuple(temp)) local_disc_inputs_list.append(temp) if len(tot_inputs) == limit: break # local search for i in range(len(p)): path_constraint = copy.deepcopy(p) c = path_constraint[i] if c[0] == sensitive_param - 1: continue if c[1] == "<=": c[1] = ">" c[3] = 1.0 - c[3] else: c[1] = "<=" c[3] = 1.0 - c[3] if path_constraint not in visited_path: visited_path.append(path_constraint) input = local_solve(path_constraint, arguments, t, i, data_config[dataset]) l_count += 1 if input != None: r = average_confidence(path_constraint) q.put((rank2 + r, input)) # global search prefix_pred = [] for c in p: if c[0] == sensitive_param - 1: continue if c[3] < T1: break n_c = copy.deepcopy(c) if n_c[1] == "<=": n_c[1] = ">" n_c[3] = 1.0 - c[3] else: n_c[1] = "<=" n_c[3] = 1.0 - c[3] path_constraint = prefix_pred + [n_c] # filter out the path_constraint already solved before if path_constraint not in visited_path: visited_path.append(path_constraint) input = global_solve(path_constraint, arguments, t, data_config[dataset]) g_count += 1 if input != None: r = average_confidence(path_constraint) q.put((rank3 - r, input)) prefix_pred = prefix_pred + [c] # create the folder for storing the fairness testing result if not os.path.exists('../results/'): os.makedirs('../results/') if not os.path.exists('../results/' + dataset + '/'): os.makedirs('../results/' + dataset + '/') if not os.path.exists('../results/' + dataset + '/' + str(sensitive_param) + '/'): os.makedirs('../results/' + dataset + '/' + str(sensitive_param) + '/') # storing the fairness testing result np.save( '../results/' + dataset + '/' + str(sensitive_param) + '/global_samples_symbolic.npy', np.array(global_disc_inputs_list)) np.save( '../results/' + dataset + '/' + str(sensitive_param) + '/local_samples_symbolic.npy', np.array(local_disc_inputs_list)) # print the overview information of result print("Total Inputs are " + str(len(tot_inputs))) print( "Total discriminatory inputs of global search- " + str(len(global_disc_inputs)), g_count) print( "Total discriminatory inputs of local search- " + str(len(local_disc_inputs)), l_count)
def nai(dataset, sens_param, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() data = {"census":census_data, "credit":credit_data, "bank":bank_data} data_config = {"census":census, "credit":credit, "bank":bank} # data preprocessing X, Y, input_shape, nb_classes = data[dataset](sens_param) X_original = np.array(X) Y_original = np.array(Y) # model structure model = dnn(input_shape, nb_classes) # tf operation tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) preds = model(x) saver = tf.train.Saver() saver.restore(sess, '../models/' + dataset + '/999/test.model') eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for original model: {0}'.format(accuracy)) unique_neurons = 0 for layer in model.layers: if "Conv2D" in layer.__class__.__name__: unique_neurons += layer.output_channels elif "Linear" in layer.__class__.__name__: unique_neurons += layer.num_hid # as for BN, it changes when Conv2D changes, so would make sure to invert the activation indices = np.random.choice(unique_neurons, int(unique_neurons * ration), replace=False) neurons_count = 0 for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: unique_neurons_layer = layer.output_channels mutated_neurons = set(indices) & set(np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array(list(mutated_neurons)) - neurons_count kernel_shape = layer.kernel_shape mutated_metrix = np.asarray([1.0] * unique_neurons_layer) mutated_metrix[mutated_neurons] = -1.0 mutated_kernel = np.asarray([[[list(mutated_metrix)]] * kernel_shape[1]] * kernel_shape[0]) update_kernel = tf.assign(layer.kernels, mutated_kernel * sess.run(layer.kernels)) update_bias = tf.assign(layer.b, mutated_metrix * sess.run(layer.b)) sess.run(update_kernel) sess.run(update_bias) if "BN" in model.layers[i + 1].__class__.__name__: layer = model.layers[i + 1] update_beta = tf.assign(layer.beta, mutated_metrix * sess.run(layer.beta)) update_moving_mean = tf.assign(layer.moving_mean, mutated_metrix * sess.run(layer.moving_mean)) sess.run(update_beta) sess.run(update_moving_mean) neurons_count += unique_neurons_layer elif "Linear" in layer.__class__.__name__: unique_neurons_layer = layer.num_hid mutated_neurons = set(indices) & set(np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array(list(mutated_neurons)) - neurons_count input_shape = layer.input_shape[1] mutated_metrix = np.asarray([1.0] * unique_neurons_layer) mutated_metrix[mutated_neurons] = -1.0 mutated_weight = np.asarray([list(mutated_metrix)] * input_shape) weight = sess.run(layer.W) update_weight = tf.assign(layer.W, mutated_weight * weight) update_bias = tf.assign(layer.b, mutated_metrix * sess.run(layer.b)) sess.run(update_weight) sess.run(update_bias) neurons_count += unique_neurons_layer mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params) print('Test accuracy on legitimate test examples for mutated model: {0}'.format(mutated_accuracy)) # if mutated_accuracy >= threshold * accuracy: # train_dir = os.path.join(path.mu_model_path, 'nai', dataset + '_' + model_name, '0') # if not os.path.exists(train_dir): # os.makedirs(train_dir) # save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') # saver = tf.train.Saver() # saver.save(sess, save_path) sess.close()
def interpretability(filename, dataset, max_iter, k, n_folds, f_gini, f_accuracy, f_time, f_ci, f_iteration, f_trees): data = {"census": census_data, "credit": credit_data, "bank": bank_data} data_config = {"census": census, "credit": credit, "bank": bank} X, Y, input_shape, nb_classes = data[dataset]() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) model = dnn(input_shape, nb_classes) preds = model(x) # print("-->preds ", preds) saver = tf.train.Saver() saver.restore(sess, "../models/bank/test.model") grad_0 = gradient_graph(x, preds) tfops = tf.sign(grad_0) dataset_list = load_csv(filename) del dataset_list[0] for i in range(len(dataset_list[0])): str_column_to_float(dataset_list, i) # print("-->dataset:", np.array(dataset_list)) # print(np.array(dataset_list).shape) new_dataset = [] for d in dataset_list: del (d[-1]) # d_plus = clip(np.array([d]), data_config[dataset_name]).astype("int") # d = d_plus[0] #clip d in dataset_list # d = clip(d, data_config[dataset]) # d = list(np.array(d).astype("int")) # print(d, type(d), type(d[0])) # d = np.array([d]) probs = model_prediction(sess, x, preds, np.array([d]))[0] # n_probs: prediction vector label = np.argmax(probs) # GET index of max value in n_probs prob = probs[label] # d = np.array(d, label) d.append(label) # print(d) new_dataset.append(d) # print("-->dataset:", np.array(new_dataset)) # print(np.array(new_dataset).shape) original_dataset = new_dataset def decision_tree_accuracy(feature_set): seed(1) original_data = get_DT_cluster(original_dataset, cluster_num, feature_set, params) print(len(original_data)) scores, dif_scores, trees = evaluate_algorithm(original_data, decision_tree, n_folds, max_depth, min_size) # print("-->scores, dif_scores:", scores, dif_scores) all_scores = [] all_scores.append(scores) all_scores.append(sum([s[1] for s in dif_scores]) / float(len(dif_scores))) all_scores.append(sum([s[2] for s in dif_scores]) / float(len(dif_scores))) print('Scores: %s' % scores) print('Mean Accuracy: %.3f%%' % (sum(scores) / float(len(scores)))) # print("-->dif_scores:", dif_scores) print('0 Mean Accuracy: %.3f%%' % (sum([s[1] for s in dif_scores]) / float(len(dif_scores)))) print('1 Mean Accuracy: %.3f%%' % (sum([s[2] for s in dif_scores]) / float(len(dif_scores)))) f_accuracy.write(str(sum(scores) / float(len(scores))) + " ") f_accuracy.write(str(sum([s[1] for s in dif_scores]) / float(len(dif_scores))) + " ") f_accuracy.write(str(sum([s[2] for s in dif_scores]) / float(len(dif_scores))) + "\n") max_index = scores.index(max(scores)) return all_scores, trees[max_index] def perturbation(sess, preds, x, feature_set, condition, clusters, limit, original_dataset): # grad_0 = gradient_graph(x, preds) # print("-->feature_set1:", feature_set) # inputs = get_cluster(sess, x, preds, dataset, cluster_num, feature_set, condition) basic_label = condition[-1][0] inputs = seed_test_input(clusters, limit, basic_label, feature_set, condition, original_dataset) # print("-->inputs:", inputs) length = len(inputs) print("-->length1", length) seed_num = 0 ci_num = 0 r = False itr_num = 0 get_CI = False final_itr_num = 0 zero_gradient_itr = 0 # print("-->inputs", inputs) for num in range(len(inputs)): # print("-->seed iteration: ", num) seed_num += 1 index = inputs[num] sample = original_dataset[index][:-1] sample = np.array([sample]) # sample = X[index:index + 1] # sample = X[index] # print("-->sample:", sample) # probs = model_prediction(sess, x, preds, sample)[0] # label = np.argmax(probs) # index of maximum probability in prediction # label1 = original_dataset[index][-1] # if label != label1: # print("label != label1") # if label != basic_label: # print("label != basic_label") # print("-->basic_label:", label) for iter in range(max_iter + 1): # 10 # print("--> global iteration:", iter) itr_num += 1 # print("--> sample:", sample) s_grad = sess.run(tfops, feed_dict={x: sample}) g_diff = s_grad[0] # print("-->g_diff", g_diff) # features in feature_set unchange # print("-->index in feature set:", feature_set) for index in feature_set: g_diff[index - 1] = 0 # print("-->g_diff", g_diff) if np.zeros(input_shape[1]).tolist() == g_diff.tolist(): # print("-->0 gradient") # zero_gradient_itr += 1 # index = np.random.randint(len(g_diff) - 1) # g_diff[index] = 1.0* break # sample[0] = clip(sample[0] + perturbation_size * g_diff, data_config[dataset]).astype("int") # n_sample = sample.copy() # print("1-->n_sample:", n_sample) n_sample = [] new_sample = clip(sample[0] + perturbation_size * g_diff, data_config[dataset]) n_sample.append(new_sample) n_sample = np.array(n_sample) # print("2-->n_sample:", n_sample) n_probs = model_prediction(sess, x, preds, n_sample)[0] # n_probs: prediction vector n_label = np.argmax(n_probs) # GET index of max value in n_probs # print("-", n_label) if n_label != basic_label: # print("-->label != n_label") # print("final label:", label, n_label) # print("-->n_sample:", n_sample) ci_num += 1 if get_CI == False: final_itr_num = itr_num get_CI = True r = True break # return True # return False print(r, ci_num, seed_num, final_itr_num) return r, ci_num, seed_num, final_itr_num all_feature_set = list(range(1, data_config[dataset].params + 1)) cluster_num = 4 params = data_config[dataset].params max_depth = 2 min_size = 10 feature_sets = list(itertools.combinations(all_feature_set, k)) print(feature_sets) DT_file_index = 0 scores = [] feature_sets = [(12, 16)] for feature_set in feature_sets: print("-->feature_set", feature_set) # decision tree # tree = all_DT_trees[DT_file_index] # tree = dict(eval(tree)) DT_file_index += 1 start1 = time.clock() score, tree = decision_tree_accuracy(feature_set) end1 = time.clock() f_trees.write(str(tree) + "\n") f_time.write(str(end1 - start1) + " ") # perturbation print("-->tree:", tree) tree_conditions = [] get_conditions(tree, result=tree_conditions, dir=-1, tmp=[]) print("-->tree_condition:", tree_conditions) # print(tree_conditions[15]) all_result = [] all_general_result = [] results = [] number = 1 feature_set = list(feature_set) all_ci_num = 0 all_seed_num = 0 all_itr_num = 0 limit = 1000 clusters = get_cluster(dataset, cluster_num, feature_set) tree_brench = len(tree_conditions) # set tree conditions # tree_conditions = [tree_conditions[6]] start2 = time.clock() for condition in tree_conditions: print("sequence:", number, condition) result, ci_num, seed_num, itr_num = perturbation(sess, preds, x, feature_set, condition, clusters, limit, original_dataset) # sess, preds, x, feature_set, condition, clusters, limit all_ci_num += ci_num all_seed_num += seed_num results.append(result) print("-->result:", result) if result == True: all_itr_num += itr_num number += 1 all_result.append(results) true_num = results.count(True) print("-->results:", results) print("-->counter instance:", all_ci_num, all_seed_num, all_ci_num / float(all_seed_num)) print("-->iteration num:", all_itr_num / float(true_num)) # file 2 counter instance f_ci.write(str(all_ci_num) + " " + str(all_seed_num) + " " + str(all_ci_num / float(all_seed_num)) + "\n") # file 3 iteration num f_iteration.write(str(all_itr_num / float(true_num)) + " " + str(true_num / float(tree_brench)) + "\n") if len(results) == len(tree_conditions): if not any(results): print("-->used features:", feature_set) print("-->all_results:", all_result) print("-->interpretable!") break end2 = time.clock() f_time.write(str(end2 - start2) + "\n") return