def training(dataset, model_path):
    """
    Train the model
    :param dataset: the name of testing dataset
    :param model_path: the path to save trained model
    """
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}

    # prepare the data and model
    X, Y, input_shape, nb_classes = data[dataset]()
    print("-->x, y, input_shape, nb_classes", X, Y, input_shape, nb_classes)
    tf.set_random_seed(1234)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)
    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    model = dnn(input_shape, nb_classes)
    preds = model(x)
    print("-->preds:", preds)

    # training parameters
    train_params = {
        'nb_epochs': 1000,
        'batch_size': 128,
        'learning_rate': 0.01,
        'train_dir': model_path,
        'filename': 'test.model'
    }

    # training procedure
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2019, 7, 15])
    model_train(sess,
                x,
                y,
                preds,
                X,
                Y,
                args=train_params,
                rng=rng,
                save=False)

    # evaluate the accuracy of trained model
    eval_params = {'batch_size': 128}
    accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
예제 #2
0
def gf(dataset,
       sens_param,
       ration=0.1,
       threshold=0.9,
       batch_size=256,
       epoch=9):
    tf.reset_default_graph()
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}
    # data preprocessing
    X, Y, input_shape, nb_classes = data[dataset](sens_param)
    X_original = np.array(X)
    Y_original = np.array(Y)

    # model structure
    model = dnn(input_shape, nb_classes)

    # tf operation
    tf.set_random_seed(1234)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)

    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    preds = model(x)

    saver = tf.train.Saver()
    saver.restore(sess, '../models/' + dataset + '/999/test.model')

    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for original model: {0}'.
          format(accuracy))

    num_weights = 0
    for layer in model.layers:
        if "Conv2D" in layer.__class__.__name__:
            shape = layer.kernels.shape
            num_weights += int(shape[0] * shape[1] * shape[2] * shape[3])
        elif "BN" in layer.__class__.__name__:
            shape = layer.gamma.shape
            num_weights += int(shape[0])
        elif "Linear" in layer.__class__.__name__:
            shape = layer.W.shape
            num_weights += int(shape[0] * shape[1])
    indices = np.random.choice(num_weights,
                               int(num_weights * ration),
                               replace=False)

    weights_count = 0
    for i in range(len(model.layers)):
        layer = model.layers[i]
        if "Conv2D" in layer.__class__.__name__:
            shape = layer.kernels.shape
            num_weights_layer = int(shape[0] * shape[1] * shape[2] * shape[3])
            mutated_indices = set(indices) & set(
                np.arange(weights_count, weights_count + num_weights_layer))
            if mutated_indices:
                mutated_indices = np.array(
                    list(mutated_indices)) - weights_count
                current_weights = sess.run(layer.kernels).reshape(-1)
                avg_weights = np.mean(current_weights)
                std_weights = np.std(current_weights)
                mutated_weights = np.random.normal(avg_weights, std_weights,
                                                   mutated_indices.size)
                current_weights[mutated_indices] = mutated_weights
                update_weights = tf.assign(layer.kernels,
                                           current_weights.reshape(shape))
                sess.run(update_weights)
            weights_count += num_weights_layer
        elif "BN" in layer.__class__.__name__:
            shape = layer.gamma.shape
            num_weights_layer = int(shape[0])
            mutated_indices = set(indices) & set(
                np.arange(weights_count, weights_count + num_weights_layer))
            if mutated_indices:
                mutated_indices = np.array(
                    list(mutated_indices)) - weights_count
                current_weights = sess.run(layer.gamma).reshape(-1)
                avg_weights = np.mean(current_weights)
                std_weights = np.std(current_weights)
                mutated_weights = np.random.normal(avg_weights, std_weights,
                                                   mutated_indices.size)
                current_weights[mutated_indices] = mutated_weights
                update_weights = tf.assign(layer.gamma,
                                           current_weights.reshape(shape))
                sess.run(update_weights)
            weights_count += num_weights_layer
        elif "Linear" in layer.__class__.__name__:
            shape = layer.W.shape
            num_weights_layer = int(shape[0] * shape[1])
            mutated_indices = set(indices) & set(
                np.arange(weights_count, weights_count + num_weights_layer))
            if mutated_indices:
                mutated_indices = np.array(
                    list(mutated_indices)) - weights_count
                current_weights = sess.run(layer.W).reshape(-1)
                avg_weights = np.mean(current_weights)
                std_weights = np.std(current_weights)
                mutated_weights = np.random.normal(avg_weights, std_weights,
                                                   mutated_indices.size)
                current_weights[mutated_indices] = mutated_weights
                update_weights = tf.assign(layer.W,
                                           current_weights.reshape(shape))
                sess.run(update_weights)
            weights_count += num_weights_layer

    mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for mutated model: {0}'.
          format(mutated_accuracy))

    # if mutated_accuracy >= threshold * accuracy:
    #     train_dir = os.path.join(path.mu_model_path, 'gf', datasets + '_' + model_name, '0')
    #     if not os.path.exists(train_dir):
    #         os.makedirs(train_dir)
    #     save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model')
    #     saver = tf.train.Saver()
    #     saver.save(sess, save_path)

    sess.close()
예제 #3
0
def aequitas(dataset, sensitive_param, model_path, max_global, max_local,
             step_size):
    """
    The implementation of AEQUITAS_Fully_Connected
    :param dataset: the name of testing dataset
    :param sensitive_param: the name of testing dataset
    :param model_path: the path of testing model
    :param max_global: the maximum number of samples for global search
    :param max_local: the maximum number of samples for local search
    :param step_size: the step size of perturbation
    :return:
    """
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}
    params = data_config[dataset].params

    # hyper-parameters for initial probabilities of directions
    init_prob = 0.5
    direction_probability = [init_prob] * params
    direction_probability_change_size = 0.001

    # hyper-parameters for features
    param_probability = [1.0 / params] * params
    param_probability_change_size = 0.001

    # prepare the testing data and model
    X, Y, input_shape, nb_classes = data[dataset]()
    model = dnn(input_shape, nb_classes)
    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    preds = model(x)
    tf.set_random_seed(1234)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)
    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    # store the result of fairness testing
    global_disc_inputs = set()
    global_disc_inputs_list = []
    local_disc_inputs = set()
    local_disc_inputs_list = []
    tot_inputs = set()

    # initial input
    if dataset == "census":
        initial_input = [7, 4, 26, 1, 4, 4, 0, 0, 0, 1, 5, 73, 1]
    elif dataset == "credit":
        initial_input = [
            2, 24, 2, 2, 37, 0, 1, 2, 1, 0, 4, 2, 2, 2, 1, 1, 2, 1, 0, 0
        ]
    elif dataset == "bank":
        initial_input = [3, 11, 2, 0, 0, 5, 1, 0, 0, 5, 4, 40, 1, 1, 0, 0]
    minimizer = {"method": "L-BFGS-B"}

    def evaluate_local(inp):
        """
        Evaluate whether the test input after local perturbation is an individual discriminatory instance
        :param inp: test input
        :return: whether it is an individual discriminatory instance
        """
        result = check_for_error_condition(data_config[dataset], sess, x,
                                           preds, inp, sensitive_param)
        temp = copy.deepcopy(inp.astype('int').tolist())
        temp = temp[:sensitive_param - 1] + temp[sensitive_param:]
        tot_inputs.add(tuple(temp))
        if result != int(inp[sensitive_param - 1]) and (
                tuple(temp)
                not in global_disc_inputs) and (tuple(temp)
                                                not in local_disc_inputs):
            local_disc_inputs.add(tuple(temp))
            local_disc_inputs_list.append(temp)
        return not result

    global_discovery = Global_Discovery(data_config[dataset])
    local_perturbation = Local_Perturbation(
        sess, preds, x, data_config[dataset], sensitive_param,
        param_probability, param_probability_change_size,
        direction_probability, direction_probability_change_size, step_size)

    length = min(max_global, len(X))
    value_list = []
    for i in range(length):
        # global generation
        inp = global_discovery.__call__(initial_input)
        temp = copy.deepcopy(inp)
        temp = temp[:sensitive_param - 1] + temp[sensitive_param:]
        tot_inputs.add(tuple(temp))

        result = check_for_error_condition(data_config[dataset], sess, x,
                                           preds, inp, sensitive_param)

        # if get an individual discriminatory instance
        if result != inp[sensitive_param - 1] and (
                tuple(temp)
                not in global_disc_inputs) and (tuple(temp)
                                                not in local_disc_inputs):
            global_disc_inputs_list.append(temp)
            global_disc_inputs.add(tuple(temp))
            value_list.append([inp[sensitive_param - 1], result])

            # local generation
            basinhopping(evaluate_local,
                         inp,
                         stepsize=1.0,
                         take_step=local_perturbation,
                         minimizer_kwargs=minimizer,
                         niter=max_local)

    # create the folder for storing the fairness testing result
    if not os.path.exists('../results/'):
        os.makedirs('../results/')
    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    if not os.path.exists('../results/' + dataset + '/' +
                          str(sensitive_param) + '/'):
        os.makedirs('../results/' + dataset + '/' + str(sensitive_param) + '/')

    # storing the fairness testing result
    np.save(
        '../results/' + dataset + '/' + str(sensitive_param) +
        '/global_samples_aequitas.npy', np.array(global_disc_inputs_list))
    np.save(
        '../results/' + dataset + '/' + str(sensitive_param) +
        '/disc_value_aequitas.npy', np.array(value_list))
    np.save(
        '../results/' + dataset + '/' + str(sensitive_param) +
        '/local_samples_aequitas.npy', np.array(local_disc_inputs_list))

    # print the overview information of result
    print("Total Inputs are " + str(len(tot_inputs)))
    print("Total discriminatory inputs of global search- " +
          str(len(global_disc_inputs)))
    print("Total discriminatory inputs of local search- " +
          str(len(local_disc_inputs)))
예제 #4
0
def ns(dataset,
       sens_param,
       ration=0.1,
       threshold=0.9,
       batch_size=256,
       epoch=9):
    tf.reset_default_graph()
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}
    # data preprocessing
    X, Y, input_shape, nb_classes = data[dataset](sens_param)
    X_original = np.array(X)
    Y_original = np.array(Y)

    # model structure
    model = dnn(input_shape, nb_classes)

    # tf operation
    tf.set_random_seed(1234)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)

    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    preds = model(x)

    saver = tf.train.Saver()
    saver.restore(sess, '../models/' + dataset + '/999/test.model')

    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for original model: {0}'.
          format(accuracy))

    for i in range(len(model.layers)):
        layer = model.layers[i]
        if "Conv2D" in layer.__class__.__name__:
            unique_neurons_layer = layer.output_channels
            shuffle_num = unique_neurons_layer * ration
            if shuffle_num > 1.0:
                shuffle_num = math.floor(
                    shuffle_num) if shuffle_num > 2.0 else math.ceil(
                        shuffle_num)
                mutated_neurons = np.random.choice(unique_neurons_layer,
                                                   int(shuffle_num),
                                                   replace=False)
                current_weights = sess.run(layer.kernels).transpose(
                    [3, 0, 1, 2])
                current_bias = sess.run(layer.b)
                shuffle_neurons = copy.copy(mutated_neurons)
                np.random.shuffle(shuffle_neurons)
                current_weights[mutated_neurons] = current_weights[
                    shuffle_neurons]
                current_bias[mutated_neurons] = current_bias[shuffle_neurons]
                update_weights = tf.assign(
                    layer.kernels, current_weights.transpose([1, 2, 3, 0]))
                update_bias = tf.assign(layer.b, current_bias)
                sess.run(update_weights)
                sess.run(update_bias)
                if "BN" in model.layers[i + 1].__class__.__name__:
                    layer = model.layers[i + 1]
                    current_gamma = sess.run(layer.gamma)
                    current_beta = sess.run(layer.beta)
                    current_moving_mean = sess.run(layer.moving_mean)
                    current_moving_variance = sess.run(layer.moving_variance)
                    current_gamma[mutated_neurons] = current_gamma[
                        shuffle_neurons]
                    current_beta[mutated_neurons] = current_beta[
                        shuffle_neurons]
                    current_moving_mean[mutated_neurons] = current_moving_mean[
                        shuffle_neurons]
                    current_moving_variance[
                        mutated_neurons] = current_moving_variance[
                            shuffle_neurons]
                    update_gamma = tf.assign(layer.gamma, current_gamma)
                    update_beta = tf.assign(layer.beta, current_beta)
                    update_moving_mean = tf.assign(layer.moving_mean,
                                                   current_moving_mean)
                    update_moving_variance = tf.assign(
                        layer.moving_variance, current_moving_variance)
                    sess.run(update_gamma)
                    sess.run(update_beta)
                    sess.run(update_moving_mean)
                    sess.run(update_moving_variance)
        elif "Linear" in layer.__class__.__name__:
            unique_neurons_layer = layer.num_hid
            shuffle_num = unique_neurons_layer * ration
            if shuffle_num > 1.0:
                shuffle_num = math.floor(
                    shuffle_num) if shuffle_num > 2.0 else math.ceil(
                        shuffle_num)
                mutated_neurons = np.random.choice(unique_neurons_layer,
                                                   int(shuffle_num),
                                                   replace=False)
                current_weights = sess.run(layer.W).transpose([1, 0])
                current_bias = sess.run(layer.b)
                shuffle_neurons = copy.copy(mutated_neurons)
                np.random.shuffle(shuffle_neurons)
                current_weights[mutated_neurons] = current_weights[
                    shuffle_neurons]
                current_bias[mutated_neurons] = current_bias[shuffle_neurons]
                update_weights = tf.assign(layer.W,
                                           current_weights.transpose([1, 0]))
                update_bias = tf.assign(layer.b, current_bias)
                sess.run(update_weights)
                sess.run(update_bias)

    mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for mutated model: {0}'.
          format(mutated_accuracy))

    # if mutated_accuracy >= threshold * accuracy:
    #     train_dir = os.path.join(path.mu_model_path, 'ns', dataset + '_' + model_name, '0')
    #     if not os.path.exists(train_dir):
    #         os.makedirs(train_dir)
    #     save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model')
    #     saver = tf.train.Saver()
    #     saver.save(sess, save_path)

    sess.close()
예제 #5
0
def ws(dataset,
       sens_param,
       ration=0.1,
       threshold=0.9,
       batch_size=256,
       epoch=9):
    tf.reset_default_graph()
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}
    # data preprocessing
    X, Y, input_shape, nb_classes = data[dataset](sens_param)
    X_original = np.array(X)
    Y_original = np.array(Y)

    # model structure
    model = dnn(input_shape, nb_classes)

    # tf operation
    tf.set_random_seed(1234)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)

    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    preds = model(x)

    saver = tf.train.Saver()
    saver.restore(sess, '../models/' + dataset + '/999/test.model')

    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for original model: {0}'.
          format(accuracy))

    unique_neurons = 0
    for layer in model.layers:
        if "Conv2D" in layer.__class__.__name__:
            unique_neurons += layer.output_channels
        elif "Linear" in layer.__class__.__name__:
            unique_neurons += layer.num_hid
            # every BN neuron only connected with a previous neuron
    indices = np.random.choice(unique_neurons,
                               int(unique_neurons * ration),
                               replace=False)

    neurons_count = 0
    for i in range(len(model.layers)):
        layer = model.layers[i]
        if "Conv2D" in layer.__class__.__name__:
            unique_neurons_layer = layer.output_channels
            mutated_neurons = set(indices) & set(
                np.arange(neurons_count, neurons_count + unique_neurons_layer))
            if mutated_neurons:
                mutated_neurons = np.array(
                    list(mutated_neurons)) - neurons_count
                current_weights = sess.run(layer.kernels).transpose(
                    [3, 0, 1, 2])
                for neuron in mutated_neurons:
                    old_data = current_weights[neuron].reshape(-1)
                    shuffle_index = np.arange(len(old_data))
                    np.random.shuffle(shuffle_index)
                    new_data = old_data[shuffle_index].reshape(
                        layer.kernels.shape[0], layer.kernels.shape[1],
                        layer.kernels.shape[2])
                    current_weights[neuron] = new_data
                update_weights = tf.assign(
                    layer.kernels, current_weights.transpose([1, 2, 3, 0]))
                sess.run(update_weights)
            neurons_count += unique_neurons_layer
        elif "Linear" in layer.__class__.__name__:
            unique_neurons_layer = layer.num_hid
            mutated_neurons = set(indices) & set(
                np.arange(neurons_count, neurons_count + unique_neurons_layer))
            if mutated_neurons:
                mutated_neurons = np.array(
                    list(mutated_neurons)) - neurons_count
                current_weights = sess.run(layer.W).transpose([1, 0])
                for neuron in mutated_neurons:
                    old_data = current_weights[neuron]
                    shuffle_index = np.arange(len(old_data))
                    np.random.shuffle(shuffle_index)
                    new_data = old_data[shuffle_index]
                    current_weights[neuron] = new_data
                update_weights = tf.assign(layer.W,
                                           current_weights.transpose([1, 0]))
                sess.run(update_weights)
            neurons_count += unique_neurons_layer

    mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for mutated model: {0}'.
          format(mutated_accuracy))

    # if mutated_accuracy >= threshold * accuracy:
    #     train_dir = os.path.join(path.mu_model_path, 'ws', dataset + '_' + model_name, '0')
    #     if not os.path.exists(train_dir):
    #         os.makedirs(train_dir)
    #     save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model')
    #     saver = tf.train.Saver()
    #     saver.save(sess, save_path)

    sess.close()
def interpretability(filename, dataset, directorys, k_values, thresholds):
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}

    params = data_config[dataset].params
    X, Y, input_shape, nb_classes = data[dataset]()
    config = tf.ConfigProto()
    conf = data_config[dataset]
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)
    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    model = dnn(input_shape, nb_classes)
    preds = model(x)
    # print("-->preds ", preds)
    saver = tf.train.Saver()
    # model_file = "../retrained_models/" + dataset + "_df/999/test.model"
    model_file = "../models/" + dataset + "/test.model"
    saver.restore(sess, model_file)
    grad_0 = gradient_graph(x, preds)
    tfops = tf.sign(grad_0)

    # process dataset
    dataset_list = load_csv(filename)
    del dataset_list[0]
    for i in range(len(dataset_list[0])):
        str_column_to_float(dataset_list, i)
    print("-->dataset:", np.array(dataset_list))
    print(np.array(dataset_list).shape)
    model_dataset = []
    row_data = []
    for d in dataset_list:
        del (d[-1])
        row_data.append(d)
        probs = model_prediction(sess, x, preds,
                                 np.array([d
                                           ]))[0]  # n_probs: prediction vector
        label = np.argmax(probs)  # GET index of max value in n_probs
        d.append(label)
        model_dataset.append(d)
    print("-->dataset:", np.array(model_dataset))
    print(np.array(model_dataset).shape)
    original_dataset = model_dataset

    ######
    # use DT with highest accuracy
    def get_dt(directory, k_value):
        tree_file = directory + "DT_trees"
        all_DT_trees = []
        # print("-->tree_file", tree_file)
        with open(tree_file, 'r') as f:
            for line in f:
                all_DT_trees.append(line.split("\n")[0])
        accuracy_file = directory + "accuracy"
        accuracy = []
        max_accuracy_feature = 0
        max_accuracy = 0
        with open(accuracy_file, 'r') as f:  # 1
            lines = f.readlines()  # 2
            i = 0
            for line in lines:  # 3
                value = [float(s) for s in line.split()]  # 2
                accuracy.append(value[0])  # 5
                if value[0] > max_accuracy:
                    max_accuracy = value[0]
                    max_accuracy_feature = i
                i += 1
        all_feature_set = list(range(1, params + 1))
        feature_sets = list(itertools.combinations(all_feature_set, k_value))
        print("-->selected feature_set:", feature_sets[max_accuracy_feature],
              max_accuracy_feature, max_accuracy)
        # if k_value == 3:
        #     max_accuracy_feature = 170
        #     print(feature_sets[max_accuracy_feature])
        # if k_value == 2:
        #     max_accuracy_feature = 50  # 72
        #     print(feature_sets[max_accuracy_feature])
        feature_sets = [feature_sets[max_accuracy_feature]]
        return feature_sets, all_DT_trees[max_accuracy_feature]

    def get_labels1(tree):
        original_labels = [i[-1] for i in original_dataset]
        predict_labels = []
        for d in row_data:
            d = map(int, d)
            label = predict(tree, d)
            predict_labels.append(label)
        return original_labels, predict_labels

    def get_labels(tree, test_datas):
        original_labels = []
        predict_labels = []
        for d in test_datas:
            probs = model_prediction(sess, x, preds, np.array(
                [d]))[0]  # n_probs: prediction vector
            model_label = np.argmax(probs)  # GET index of max value in n_probs
            original_labels.append(model_label)
            tree_label = predict(tree, d, feature_sets[0])
            predict_labels.append(tree_label)
        return original_labels, predict_labels

    def one_num(list):
        num = 0
        for l in list:
            if l == 1:
                num += 1
        return num

    def sprt_three_figure(all_prs, accept_pr, deny_pr, threshold, k_values):
        prs1 = all_prs[0]
        prs2 = all_prs[1]
        prs3 = all_prs[2]
        k_value1 = k_values[0]
        k_value2 = k_values[1]
        k_value3 = k_values[2]

        length = max(len(prs1), len(prs2), len(prs3))
        Y = list(range(0, length))
        title_name = "threshold=" + str(threshold)
        plt.title(title_name)
        accept_prs = [accept_pr] * length
        deny_prs = [deny_pr] * length
        plt.plot(Y,
                 accept_prs,
                 color='black',
                 linestyle="--",
                 label="accept_bound")
        plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound")

        plt.plot(list(range(0, len(prs1))),
                 prs1,
                 color='red',
                 label="k=" + str(k_value1))
        plt.plot(list(range(0, len(prs2))),
                 prs2,
                 color='blue',
                 label="k=" + str(k_value2))
        plt.plot(list(range(0, len(prs3))),
                 prs3,
                 color='green',
                 label="k=" + str(k_value3))
        # plt.legend()
        # plt.legend(loc=[0, 1])
        # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True, shadow=True)
        plt.legend(loc='upper center',
                   bbox_to_anchor=(0.5, -0.15),
                   fancybox=True,
                   shadow=True,
                   ncol=5)
        plt.xlabel('number of detected samples')
        plt.ylabel('rate')
        plt.show()

    def sprt_four_figure(all_prs, accept_pr, deny_pr, threshold, k_values):
        prs1 = all_prs[0]
        prs2 = all_prs[1]
        prs3 = all_prs[2]
        prs4 = all_prs[3]
        k_value1 = k_values[0]
        k_value2 = k_values[1]
        k_value3 = k_values[2]
        k_value4 = k_values[3]

        length = max(len(prs1), len(prs2), len(prs3), len(prs4))
        Y = list(range(0, length))
        title_name = "threshold=" + str(threshold)
        plt.title(title_name)
        accept_prs = [accept_pr] * length
        deny_prs = [deny_pr] * length
        plt.plot(Y,
                 accept_prs,
                 color='black',
                 linestyle="--",
                 label="accept_bound")
        plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound")

        plt.plot(list(range(0, len(prs1))),
                 prs1,
                 color='red',
                 label="k=" + str(k_value1))
        plt.plot(list(range(0, len(prs2))),
                 prs2,
                 color='blue',
                 label="k=" + str(k_value2))
        plt.plot(list(range(0, len(prs3))),
                 prs3,
                 color='green',
                 label="k=" + str(k_value3))
        plt.plot(list(range(0, len(prs4))),
                 prs4,
                 color='purple',
                 label="k=" + str(k_value4))
        # plt.legend()
        # plt.legend(loc=[0, 1])
        # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True, shadow=True)
        plt.legend(loc='upper center',
                   bbox_to_anchor=(0.5, -0.15),
                   fancybox=True,
                   shadow=True,
                   ncol=5)
        plt.xlabel('number of detected samples')
        plt.ylabel('rate')
        plt.show()

    def sprt_one_figure(prs, accept_pr, deny_pr, threshold, k_value):
        length = len(prs)
        Y = list(range(0, length))
        title_name = "threshold=" + str(threshold) + " (k=" + str(
            k_value) + ")"
        plt.title(title_name)
        accept_prs = [accept_pr] * length
        deny_prs = [deny_pr] * length
        plt.plot(Y,
                 accept_prs,
                 color='black',
                 linestyle="--",
                 label="accept_bound")
        plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound")
        plt.plot(Y, prs, label="k=" + str(k_value))
        # plt.plot(sub_axix, test_acys, color='red', label='testing accuracy')
        # plt.plot(x_axix, train_pn_dis, color='skyblue', label='PN distance')
        # plt.plot(x_axix, thresholds, color='blue', label='threshold')
        plt.legend()
        plt.xlabel('number of detected samples')
        plt.ylabel('rate')
        plt.show()

    all_prs = []
    random_test_data = generate_random_data(1000, conf)
    print("-->random_test_data:", random_test_data)
    for i in range(0, len(directorys)):
        directory = directorys[i]
        k_value = k_values[i]

        print("-->dir, k", directory, k_value)

        feature_sets, tree = get_dt(directory, k_value)
        print("-->feature_set", feature_sets[0])

        print("-->tree", tree)

        tree = dict(eval(tree))
        original_labels, predict_labels = get_labels(tree, random_test_data)
        same_ratio(original_labels, predict_labels)
        print("-->one num", one_num(original_labels), one_num(predict_labels))

        print("-->original labels", original_labels)
        print("-->predict labels", predict_labels)
        # print("-->sprt result:", sprt_detect(original_labels, predict_labels, threshold, k_value))

        if_accept, same_count, total_count, prs, accept_pr, deny_pr = sprt_detect(
            original_labels, predict_labels, k_value, threshold)
        print("-->sprt result:", if_accept, same_count, total_count)
        all_prs.append(prs)

        sprt_detect_multiplethre(original_labels, predict_labels, k_value,
                                 thresholds)

    # test
    # sprt_three_figure(all_prs, accept_pr, deny_pr, threshold, k_values)
    # sprt_four_figure(all_prs, accept_pr, deny_pr, threshold, k_values)
    # sprt_one_figure(prs, accept_pr, deny_pr, k_value, threshold)

    return
def symbolic_generation(dataset, sensitive_param, model_path, cluster_num,
                        limit):
    """
    The implementation of symbolic generation
    :param dataset: the name of dataset
    :param sensitive_param: the index of sensitive feature
    :param model_path: the path of testing model
    :param cluster_num: the number of clusters to form as well as the number of
            centroids to generate
    :param limit: the maximum number of test case
    """
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}

    # the rank for priority queue, rank1 is for seed inputs, rank2 for local, rank3 for global
    rank1 = 5
    rank2 = 1
    rank3 = 10
    T1 = 0.3

    # prepare the testing data and model
    X, Y, input_shape, nb_classes = data[dataset]()
    arguments = gen_arguments(data_config[dataset])
    model = dnn(input_shape, nb_classes)
    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    preds = model(x)
    tf.set_random_seed(1234)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)
    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    # store the result of fairness testing
    global_disc_inputs = set()
    global_disc_inputs_list = []
    local_disc_inputs = set()
    local_disc_inputs_list = []
    tot_inputs = set()

    # select the seed input for fairness testing
    inputs = seed_test_input(dataset, cluster_num, limit)
    q = PriorityQueue()  # low push first
    for inp in inputs[::-1]:
        q.put((rank1, X[inp].tolist()))

    visited_path = []
    l_count = 0
    g_count = 0
    while len(tot_inputs) < limit and q.qsize() != 0:
        t = q.get()
        t_rank = t[0]
        t = np.array(t[1])
        found = check_for_error_condition(data_config[dataset], sess, x, preds,
                                          t, sensitive_param)
        p = getPath(X, sess, x, preds, t, data_config[dataset])
        temp = copy.deepcopy(t.tolist())
        temp = temp[:sensitive_param - 1] + temp[sensitive_param:]

        tot_inputs.add(tuple(temp))
        if found:
            if (tuple(temp)
                    not in global_disc_inputs) and (tuple(temp)
                                                    not in local_disc_inputs):
                if t_rank > 2:
                    global_disc_inputs.add(tuple(temp))
                    global_disc_inputs_list.append(temp)
                else:
                    local_disc_inputs.add(tuple(temp))
                    local_disc_inputs_list.append(temp)
                if len(tot_inputs) == limit:
                    break

            # local search
            for i in range(len(p)):
                path_constraint = copy.deepcopy(p)
                c = path_constraint[i]
                if c[0] == sensitive_param - 1:
                    continue

                if c[1] == "<=":
                    c[1] = ">"
                    c[3] = 1.0 - c[3]
                else:
                    c[1] = "<="
                    c[3] = 1.0 - c[3]

                if path_constraint not in visited_path:
                    visited_path.append(path_constraint)
                    input = local_solve(path_constraint, arguments, t, i,
                                        data_config[dataset])
                    l_count += 1
                    if input != None:
                        r = average_confidence(path_constraint)
                        q.put((rank2 + r, input))

        # global search
        prefix_pred = []
        for c in p:
            if c[0] == sensitive_param - 1:
                continue
            if c[3] < T1:
                break

            n_c = copy.deepcopy(c)
            if n_c[1] == "<=":
                n_c[1] = ">"
                n_c[3] = 1.0 - c[3]
            else:
                n_c[1] = "<="
                n_c[3] = 1.0 - c[3]
            path_constraint = prefix_pred + [n_c]

            # filter out the path_constraint already solved before
            if path_constraint not in visited_path:
                visited_path.append(path_constraint)
                input = global_solve(path_constraint, arguments, t,
                                     data_config[dataset])
                g_count += 1
                if input != None:
                    r = average_confidence(path_constraint)
                    q.put((rank3 - r, input))

            prefix_pred = prefix_pred + [c]

    # create the folder for storing the fairness testing result
    if not os.path.exists('../results/'):
        os.makedirs('../results/')
    if not os.path.exists('../results/' + dataset + '/'):
        os.makedirs('../results/' + dataset + '/')
    if not os.path.exists('../results/' + dataset + '/' +
                          str(sensitive_param) + '/'):
        os.makedirs('../results/' + dataset + '/' + str(sensitive_param) + '/')

    # storing the fairness testing result
    np.save(
        '../results/' + dataset + '/' + str(sensitive_param) +
        '/global_samples_symbolic.npy', np.array(global_disc_inputs_list))
    np.save(
        '../results/' + dataset + '/' + str(sensitive_param) +
        '/local_samples_symbolic.npy', np.array(local_disc_inputs_list))

    # print the overview information of result
    print("Total Inputs are " + str(len(tot_inputs)))
    print(
        "Total discriminatory inputs of global search- " +
        str(len(global_disc_inputs)), g_count)
    print(
        "Total discriminatory inputs of local search- " +
        str(len(local_disc_inputs)), l_count)
예제 #8
0
def nai(dataset, sens_param, ration=0.1, threshold=0.9, batch_size=256, epoch=9):
    tf.reset_default_graph()
    data = {"census":census_data, "credit":credit_data, "bank":bank_data}
    data_config = {"census":census, "credit":credit, "bank":bank}
    # data preprocessing
    X, Y, input_shape, nb_classes = data[dataset](sens_param)
    X_original = np.array(X)
    Y_original = np.array(Y)

    # model structure
    model = dnn(input_shape, nb_classes)

    # tf operation
    tf.set_random_seed(1234)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)

    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    preds = model(x)

    saver = tf.train.Saver()
    saver.restore(sess, '../models/' + dataset + '/999/test.model')

    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for original model: {0}'.format(accuracy))

    unique_neurons = 0
    for layer in model.layers:
        if "Conv2D" in layer.__class__.__name__:
            unique_neurons += layer.output_channels
        elif "Linear" in layer.__class__.__name__:
            unique_neurons += layer.num_hid
            # as for BN, it changes when Conv2D changes, so would make sure to invert the activation
    indices = np.random.choice(unique_neurons, int(unique_neurons * ration), replace=False)

    neurons_count = 0
    for i in range(len(model.layers)):
        layer = model.layers[i]
        if "Conv2D" in layer.__class__.__name__:
            unique_neurons_layer = layer.output_channels
            mutated_neurons = set(indices) & set(np.arange(neurons_count, neurons_count + unique_neurons_layer))
            if mutated_neurons:
                mutated_neurons = np.array(list(mutated_neurons)) - neurons_count
                kernel_shape = layer.kernel_shape
                mutated_metrix = np.asarray([1.0] * unique_neurons_layer)
                mutated_metrix[mutated_neurons] = -1.0
                mutated_kernel = np.asarray([[[list(mutated_metrix)]] * kernel_shape[1]] * kernel_shape[0])
                update_kernel = tf.assign(layer.kernels, mutated_kernel * sess.run(layer.kernels))
                update_bias = tf.assign(layer.b, mutated_metrix * sess.run(layer.b))
                sess.run(update_kernel)
                sess.run(update_bias)
                if "BN" in model.layers[i + 1].__class__.__name__:
                    layer = model.layers[i + 1]
                    update_beta = tf.assign(layer.beta, mutated_metrix * sess.run(layer.beta))
                    update_moving_mean = tf.assign(layer.moving_mean, mutated_metrix * sess.run(layer.moving_mean))
                    sess.run(update_beta)
                    sess.run(update_moving_mean)
            neurons_count += unique_neurons_layer
        elif "Linear" in layer.__class__.__name__:
            unique_neurons_layer = layer.num_hid
            mutated_neurons = set(indices) & set(np.arange(neurons_count, neurons_count + unique_neurons_layer))
            if mutated_neurons:
                mutated_neurons = np.array(list(mutated_neurons)) - neurons_count
                input_shape = layer.input_shape[1]
                mutated_metrix = np.asarray([1.0] * unique_neurons_layer)
                mutated_metrix[mutated_neurons] = -1.0
                mutated_weight = np.asarray([list(mutated_metrix)] * input_shape)
                weight = sess.run(layer.W)
                update_weight = tf.assign(layer.W, mutated_weight * weight)
                update_bias = tf.assign(layer.b, mutated_metrix * sess.run(layer.b))
                sess.run(update_weight)
                sess.run(update_bias)
            neurons_count += unique_neurons_layer

    mutated_accuracy = model_eval(sess, x, y, preds, X, Y, args=eval_params)
    print('Test accuracy on legitimate test examples for mutated model: {0}'.format(mutated_accuracy))

    # if mutated_accuracy >= threshold * accuracy:
    #     train_dir = os.path.join(path.mu_model_path, 'nai', dataset + '_' + model_name, '0')
    #     if not os.path.exists(train_dir):
    #         os.makedirs(train_dir)
    #     save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model')
    #     saver = tf.train.Saver()
    #     saver.save(sess, save_path)

    sess.close()
def interpretability(filename, dataset, max_iter, k, n_folds, f_gini, f_accuracy, f_time, f_ci, f_iteration, f_trees):
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}

    X, Y, input_shape, nb_classes = data[dataset]()
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)
    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    model = dnn(input_shape, nb_classes)
    preds = model(x)
    # print("-->preds ", preds)
    saver = tf.train.Saver()
    saver.restore(sess, "../models/bank/test.model")
    grad_0 = gradient_graph(x, preds)
    tfops = tf.sign(grad_0)

    dataset_list = load_csv(filename)
    del dataset_list[0]
    for i in range(len(dataset_list[0])):
        str_column_to_float(dataset_list, i)
    # print("-->dataset:", np.array(dataset_list))
    # print(np.array(dataset_list).shape)

    new_dataset = []
    for d in dataset_list:
        del (d[-1])
        # d_plus = clip(np.array([d]), data_config[dataset_name]).astype("int")
        # d = d_plus[0]

        #clip d in dataset_list
        # d = clip(d, data_config[dataset])
        # d = list(np.array(d).astype("int"))

        # print(d, type(d), type(d[0]))
        # d = np.array([d])
        probs = model_prediction(sess, x, preds, np.array([d]))[0]  # n_probs: prediction vector
        label = np.argmax(probs)  # GET index of max value in n_probs
        prob = probs[label]
        # d = np.array(d, label)
        d.append(label)
        # print(d)
        new_dataset.append(d)

    # print("-->dataset:", np.array(new_dataset))
    # print(np.array(new_dataset).shape)
    original_dataset = new_dataset

    def decision_tree_accuracy(feature_set):
        seed(1)
        original_data = get_DT_cluster(original_dataset, cluster_num, feature_set, params)
        print(len(original_data))
        scores, dif_scores, trees = evaluate_algorithm(original_data, decision_tree, n_folds, max_depth, min_size)
        # print("-->scores, dif_scores:", scores, dif_scores)
        all_scores = []
        all_scores.append(scores)
        all_scores.append(sum([s[1] for s in dif_scores]) / float(len(dif_scores)))
        all_scores.append(sum([s[2] for s in dif_scores]) / float(len(dif_scores)))
        print('Scores: %s' % scores)
        print('Mean Accuracy: %.3f%%' % (sum(scores) / float(len(scores))))
        # print("-->dif_scores:", dif_scores)
        print('0 Mean Accuracy: %.3f%%' % (sum([s[1] for s in dif_scores]) / float(len(dif_scores))))
        print('1 Mean Accuracy: %.3f%%' % (sum([s[2] for s in dif_scores]) / float(len(dif_scores))))

        f_accuracy.write(str(sum(scores) / float(len(scores))) + " ")
        f_accuracy.write(str(sum([s[1] for s in dif_scores]) / float(len(dif_scores))) + " ")
        f_accuracy.write(str(sum([s[2] for s in dif_scores]) / float(len(dif_scores))) + "\n")

        max_index = scores.index(max(scores))

        return all_scores, trees[max_index]

    def perturbation(sess, preds, x, feature_set, condition, clusters, limit, original_dataset):
        # grad_0 = gradient_graph(x, preds)
        # print("-->feature_set1:", feature_set)

        # inputs = get_cluster(sess, x, preds, dataset, cluster_num, feature_set, condition)
        basic_label = condition[-1][0]
        inputs = seed_test_input(clusters, limit, basic_label, feature_set, condition, original_dataset)
        # print("-->inputs:", inputs)

        length = len(inputs)
        print("-->length1", length)

        seed_num = 0
        ci_num = 0
        r = False
        itr_num = 0
        get_CI = False
        final_itr_num = 0
        zero_gradient_itr = 0

        # print("-->inputs", inputs)

        for num in range(len(inputs)):
            # print("-->seed iteration: ", num)
            seed_num += 1

            index = inputs[num]
            sample = original_dataset[index][:-1]
            sample = np.array([sample])
            # sample = X[index:index + 1]
            # sample = X[index]
            # print("-->sample:", sample)
            # probs = model_prediction(sess, x, preds, sample)[0]
            # label = np.argmax(probs)  # index of maximum probability in prediction
            # label1 = original_dataset[index][-1]
            # if label != label1:
            #     print("label != label1")
            # if label != basic_label:
            #     print("label != basic_label")
            # print("-->basic_label:", label)

            for iter in range(max_iter + 1):  # 10
                # print("--> global iteration:", iter)
                itr_num += 1
                # print("--> sample:", sample)

                s_grad = sess.run(tfops, feed_dict={x: sample})
                g_diff = s_grad[0]
                # print("-->g_diff", g_diff)

                # features in feature_set unchange
                # print("-->index in feature set:", feature_set)
                for index in feature_set:
                    g_diff[index - 1] = 0
                # print("-->g_diff", g_diff)

                if np.zeros(input_shape[1]).tolist() == g_diff.tolist():
                    # print("-->0 gradient")
                    # zero_gradient_itr += 1
                    # index = np.random.randint(len(g_diff) - 1)
                    # g_diff[index] = 1.0*
                    break

                # sample[0] = clip(sample[0] + perturbation_size * g_diff, data_config[dataset]).astype("int")
                # n_sample = sample.copy()
                # print("1-->n_sample:", n_sample)

                n_sample = []
                new_sample = clip(sample[0] + perturbation_size * g_diff, data_config[dataset])
                n_sample.append(new_sample)
                n_sample = np.array(n_sample)
                # print("2-->n_sample:", n_sample)

                n_probs = model_prediction(sess, x, preds, n_sample)[0]  # n_probs: prediction vector
                n_label = np.argmax(n_probs)  # GET index of max value in n_probs
                # print("-", n_label)

                if n_label != basic_label:
                    # print("-->label != n_label")
                    # print("final label:", label, n_label)
                    # print("-->n_sample:", n_sample)
                    ci_num += 1
                    if get_CI == False:
                        final_itr_num = itr_num
                    get_CI = True
                    r = True
                    break
                    # return True
        # return False
        print(r, ci_num, seed_num, final_itr_num)
        return r, ci_num, seed_num, final_itr_num

    all_feature_set = list(range(1, data_config[dataset].params + 1))
    cluster_num = 4
    params = data_config[dataset].params
    max_depth = 2
    min_size = 10
    feature_sets = list(itertools.combinations(all_feature_set, k))
    print(feature_sets)

    DT_file_index = 0
    scores = []
    feature_sets = [(12, 16)]
    for feature_set in feature_sets:
        print("-->feature_set", feature_set)
        # decision tree
        # tree = all_DT_trees[DT_file_index]
        # tree = dict(eval(tree))

        DT_file_index += 1

        start1 = time.clock()
        score, tree = decision_tree_accuracy(feature_set)
        end1 = time.clock()
        f_trees.write(str(tree) + "\n")
        f_time.write(str(end1 - start1) + " ")

        # perturbation
        print("-->tree:", tree)
        tree_conditions = []
        get_conditions(tree, result=tree_conditions, dir=-1, tmp=[])
        print("-->tree_condition:", tree_conditions)
        # print(tree_conditions[15])
        all_result = []
        all_general_result = []
        results = []
        number = 1
        feature_set = list(feature_set)
        all_ci_num = 0
        all_seed_num = 0
        all_itr_num = 0

        limit = 1000
        clusters = get_cluster(dataset, cluster_num, feature_set)

        tree_brench = len(tree_conditions)

        # set tree conditions
        # tree_conditions = [tree_conditions[6]]

        start2 = time.clock()
        for condition in tree_conditions:
            print("sequence:", number, condition)
            result, ci_num, seed_num, itr_num = perturbation(sess, preds, x, feature_set, condition, clusters, limit,
                                                             original_dataset)
            # sess, preds, x, feature_set, condition, clusters, limit
            all_ci_num += ci_num
            all_seed_num += seed_num
            results.append(result)
            print("-->result:", result)
            if result == True:
                all_itr_num += itr_num
            number += 1
        all_result.append(results)
        true_num = results.count(True)
        print("-->results:", results)
        print("-->counter instance:", all_ci_num, all_seed_num, all_ci_num / float(all_seed_num))
        print("-->iteration num:", all_itr_num / float(true_num))

        # file 2 counter instance
        f_ci.write(str(all_ci_num) + " " + str(all_seed_num) + " " + str(all_ci_num / float(all_seed_num)) + "\n")

        # file 3 iteration num
        f_iteration.write(str(all_itr_num / float(true_num)) + " " + str(true_num / float(tree_brench)) + "\n")

        if len(results) == len(tree_conditions):
            if not any(results):
                print("-->used features:", feature_set)
                print("-->all_results:", all_result)
                print("-->interpretable!")
                break

        end2 = time.clock()
        f_time.write(str(end2 - start2) + "\n")

    return