def get_labels(tree, test_datas):
     original_labels = []
     predict_labels = []
     for d in test_datas:
         probs = model_prediction(sess, x, preds, np.array(
             [d]))[0]  # n_probs: prediction vector
         model_label = np.argmax(probs)  # GET index of max value in n_probs
         original_labels.append(model_label)
         tree_label = predict(tree, d, feature_sets[0])
         predict_labels.append(tree_label)
     return original_labels, predict_labels
    def perturbation(sess, preds, x, feature_set, condition, clusters, limit, original_dataset):
        # grad_0 = gradient_graph(x, preds)
        # print("-->feature_set1:", feature_set)

        # inputs = get_cluster(sess, x, preds, dataset, cluster_num, feature_set, condition)
        basic_label = condition[-1][0]
        inputs = seed_test_input(clusters, limit, basic_label, feature_set, condition, original_dataset)
        # print("-->inputs:", inputs)

        length = len(inputs)
        print("-->length1", length)

        seed_num = 0
        ci_num = 0
        r = False
        itr_num = 0
        get_CI = False
        final_itr_num = 0
        zero_gradient_itr = 0

        # print("-->inputs", inputs)

        for num in range(len(inputs)):
            # print("-->seed iteration: ", num)
            seed_num += 1

            index = inputs[num]
            sample = original_dataset[index][:-1]
            sample = np.array([sample])
            # sample = X[index:index + 1]
            # sample = X[index]
            # print("-->sample:", sample)
            # probs = model_prediction(sess, x, preds, sample)[0]
            # label = np.argmax(probs)  # index of maximum probability in prediction
            # label1 = original_dataset[index][-1]
            # if label != label1:
            #     print("label != label1")
            # if label != basic_label:
            #     print("label != basic_label")
            # print("-->basic_label:", label)

            for iter in range(max_iter + 1):  # 10
                # print("--> global iteration:", iter)
                itr_num += 1
                # print("--> sample:", sample)

                s_grad = sess.run(tfops, feed_dict={x: sample})
                g_diff = s_grad[0]
                # print("-->g_diff", g_diff)

                # features in feature_set unchange
                # print("-->index in feature set:", feature_set)
                for index in feature_set:
                    g_diff[index - 1] = 0
                # print("-->g_diff", g_diff)

                if np.zeros(input_shape[1]).tolist() == g_diff.tolist():
                    # print("-->0 gradient")
                    # zero_gradient_itr += 1
                    # index = np.random.randint(len(g_diff) - 1)
                    # g_diff[index] = 1.0*
                    break

                # sample[0] = clip(sample[0] + perturbation_size * g_diff, data_config[dataset]).astype("int")
                # n_sample = sample.copy()
                # print("1-->n_sample:", n_sample)

                n_sample = []
                new_sample = clip(sample[0] + perturbation_size * g_diff, data_config[dataset])
                n_sample.append(new_sample)
                n_sample = np.array(n_sample)
                # print("2-->n_sample:", n_sample)

                n_probs = model_prediction(sess, x, preds, n_sample)[0]  # n_probs: prediction vector
                n_label = np.argmax(n_probs)  # GET index of max value in n_probs
                # print("-", n_label)

                if n_label != basic_label:
                    # print("-->label != n_label")
                    # print("final label:", label, n_label)
                    # print("-->n_sample:", n_sample)
                    ci_num += 1
                    if get_CI == False:
                        final_itr_num = itr_num
                    get_CI = True
                    r = True
                    break
                    # return True
        # return False
        print(r, ci_num, seed_num, final_itr_num)
        return r, ci_num, seed_num, final_itr_num
def interpretability(filename, dataset, directorys, k_values, thresholds):
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}

    params = data_config[dataset].params
    X, Y, input_shape, nb_classes = data[dataset]()
    config = tf.ConfigProto()
    conf = data_config[dataset]
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)
    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    model = dnn(input_shape, nb_classes)
    preds = model(x)
    # print("-->preds ", preds)
    saver = tf.train.Saver()
    # model_file = "../retrained_models/" + dataset + "_df/999/test.model"
    model_file = "../models/" + dataset + "/test.model"
    saver.restore(sess, model_file)
    grad_0 = gradient_graph(x, preds)
    tfops = tf.sign(grad_0)

    # process dataset
    dataset_list = load_csv(filename)
    del dataset_list[0]
    for i in range(len(dataset_list[0])):
        str_column_to_float(dataset_list, i)
    print("-->dataset:", np.array(dataset_list))
    print(np.array(dataset_list).shape)
    model_dataset = []
    row_data = []
    for d in dataset_list:
        del (d[-1])
        row_data.append(d)
        probs = model_prediction(sess, x, preds,
                                 np.array([d
                                           ]))[0]  # n_probs: prediction vector
        label = np.argmax(probs)  # GET index of max value in n_probs
        d.append(label)
        model_dataset.append(d)
    print("-->dataset:", np.array(model_dataset))
    print(np.array(model_dataset).shape)
    original_dataset = model_dataset

    ######
    # use DT with highest accuracy
    def get_dt(directory, k_value):
        tree_file = directory + "DT_trees"
        all_DT_trees = []
        # print("-->tree_file", tree_file)
        with open(tree_file, 'r') as f:
            for line in f:
                all_DT_trees.append(line.split("\n")[0])
        accuracy_file = directory + "accuracy"
        accuracy = []
        max_accuracy_feature = 0
        max_accuracy = 0
        with open(accuracy_file, 'r') as f:  # 1
            lines = f.readlines()  # 2
            i = 0
            for line in lines:  # 3
                value = [float(s) for s in line.split()]  # 2
                accuracy.append(value[0])  # 5
                if value[0] > max_accuracy:
                    max_accuracy = value[0]
                    max_accuracy_feature = i
                i += 1
        all_feature_set = list(range(1, params + 1))
        feature_sets = list(itertools.combinations(all_feature_set, k_value))
        print("-->selected feature_set:", feature_sets[max_accuracy_feature],
              max_accuracy_feature, max_accuracy)
        # if k_value == 3:
        #     max_accuracy_feature = 170
        #     print(feature_sets[max_accuracy_feature])
        # if k_value == 2:
        #     max_accuracy_feature = 50  # 72
        #     print(feature_sets[max_accuracy_feature])
        feature_sets = [feature_sets[max_accuracy_feature]]
        return feature_sets, all_DT_trees[max_accuracy_feature]

    def get_labels1(tree):
        original_labels = [i[-1] for i in original_dataset]
        predict_labels = []
        for d in row_data:
            d = map(int, d)
            label = predict(tree, d)
            predict_labels.append(label)
        return original_labels, predict_labels

    def get_labels(tree, test_datas):
        original_labels = []
        predict_labels = []
        for d in test_datas:
            probs = model_prediction(sess, x, preds, np.array(
                [d]))[0]  # n_probs: prediction vector
            model_label = np.argmax(probs)  # GET index of max value in n_probs
            original_labels.append(model_label)
            tree_label = predict(tree, d, feature_sets[0])
            predict_labels.append(tree_label)
        return original_labels, predict_labels

    def one_num(list):
        num = 0
        for l in list:
            if l == 1:
                num += 1
        return num

    def sprt_three_figure(all_prs, accept_pr, deny_pr, threshold, k_values):
        prs1 = all_prs[0]
        prs2 = all_prs[1]
        prs3 = all_prs[2]
        k_value1 = k_values[0]
        k_value2 = k_values[1]
        k_value3 = k_values[2]

        length = max(len(prs1), len(prs2), len(prs3))
        Y = list(range(0, length))
        title_name = "threshold=" + str(threshold)
        plt.title(title_name)
        accept_prs = [accept_pr] * length
        deny_prs = [deny_pr] * length
        plt.plot(Y,
                 accept_prs,
                 color='black',
                 linestyle="--",
                 label="accept_bound")
        plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound")

        plt.plot(list(range(0, len(prs1))),
                 prs1,
                 color='red',
                 label="k=" + str(k_value1))
        plt.plot(list(range(0, len(prs2))),
                 prs2,
                 color='blue',
                 label="k=" + str(k_value2))
        plt.plot(list(range(0, len(prs3))),
                 prs3,
                 color='green',
                 label="k=" + str(k_value3))
        # plt.legend()
        # plt.legend(loc=[0, 1])
        # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True, shadow=True)
        plt.legend(loc='upper center',
                   bbox_to_anchor=(0.5, -0.15),
                   fancybox=True,
                   shadow=True,
                   ncol=5)
        plt.xlabel('number of detected samples')
        plt.ylabel('rate')
        plt.show()

    def sprt_four_figure(all_prs, accept_pr, deny_pr, threshold, k_values):
        prs1 = all_prs[0]
        prs2 = all_prs[1]
        prs3 = all_prs[2]
        prs4 = all_prs[3]
        k_value1 = k_values[0]
        k_value2 = k_values[1]
        k_value3 = k_values[2]
        k_value4 = k_values[3]

        length = max(len(prs1), len(prs2), len(prs3), len(prs4))
        Y = list(range(0, length))
        title_name = "threshold=" + str(threshold)
        plt.title(title_name)
        accept_prs = [accept_pr] * length
        deny_prs = [deny_pr] * length
        plt.plot(Y,
                 accept_prs,
                 color='black',
                 linestyle="--",
                 label="accept_bound")
        plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound")

        plt.plot(list(range(0, len(prs1))),
                 prs1,
                 color='red',
                 label="k=" + str(k_value1))
        plt.plot(list(range(0, len(prs2))),
                 prs2,
                 color='blue',
                 label="k=" + str(k_value2))
        plt.plot(list(range(0, len(prs3))),
                 prs3,
                 color='green',
                 label="k=" + str(k_value3))
        plt.plot(list(range(0, len(prs4))),
                 prs4,
                 color='purple',
                 label="k=" + str(k_value4))
        # plt.legend()
        # plt.legend(loc=[0, 1])
        # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True, shadow=True)
        plt.legend(loc='upper center',
                   bbox_to_anchor=(0.5, -0.15),
                   fancybox=True,
                   shadow=True,
                   ncol=5)
        plt.xlabel('number of detected samples')
        plt.ylabel('rate')
        plt.show()

    def sprt_one_figure(prs, accept_pr, deny_pr, threshold, k_value):
        length = len(prs)
        Y = list(range(0, length))
        title_name = "threshold=" + str(threshold) + " (k=" + str(
            k_value) + ")"
        plt.title(title_name)
        accept_prs = [accept_pr] * length
        deny_prs = [deny_pr] * length
        plt.plot(Y,
                 accept_prs,
                 color='black',
                 linestyle="--",
                 label="accept_bound")
        plt.plot(Y, deny_prs, color='black', linestyle=":", label="deny_bound")
        plt.plot(Y, prs, label="k=" + str(k_value))
        # plt.plot(sub_axix, test_acys, color='red', label='testing accuracy')
        # plt.plot(x_axix, train_pn_dis, color='skyblue', label='PN distance')
        # plt.plot(x_axix, thresholds, color='blue', label='threshold')
        plt.legend()
        plt.xlabel('number of detected samples')
        plt.ylabel('rate')
        plt.show()

    all_prs = []
    random_test_data = generate_random_data(1000, conf)
    print("-->random_test_data:", random_test_data)
    for i in range(0, len(directorys)):
        directory = directorys[i]
        k_value = k_values[i]

        print("-->dir, k", directory, k_value)

        feature_sets, tree = get_dt(directory, k_value)
        print("-->feature_set", feature_sets[0])

        print("-->tree", tree)

        tree = dict(eval(tree))
        original_labels, predict_labels = get_labels(tree, random_test_data)
        same_ratio(original_labels, predict_labels)
        print("-->one num", one_num(original_labels), one_num(predict_labels))

        print("-->original labels", original_labels)
        print("-->predict labels", predict_labels)
        # print("-->sprt result:", sprt_detect(original_labels, predict_labels, threshold, k_value))

        if_accept, same_count, total_count, prs, accept_pr, deny_pr = sprt_detect(
            original_labels, predict_labels, k_value, threshold)
        print("-->sprt result:", if_accept, same_count, total_count)
        all_prs.append(prs)

        sprt_detect_multiplethre(original_labels, predict_labels, k_value,
                                 thresholds)

    # test
    # sprt_three_figure(all_prs, accept_pr, deny_pr, threshold, k_values)
    # sprt_four_figure(all_prs, accept_pr, deny_pr, threshold, k_values)
    # sprt_one_figure(prs, accept_pr, deny_pr, k_value, threshold)

    return
def interpretability(filename, dataset, max_iter, k, n_folds, f_gini, f_accuracy, f_time, f_ci, f_iteration, f_trees):
    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    data_config = {"census": census, "credit": credit, "bank": bank}

    X, Y, input_shape, nb_classes = data[dataset]()
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.Session(config=config)
    x = tf.placeholder(tf.float32, shape=input_shape)
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    model = dnn(input_shape, nb_classes)
    preds = model(x)
    # print("-->preds ", preds)
    saver = tf.train.Saver()
    saver.restore(sess, "../models/bank/test.model")
    grad_0 = gradient_graph(x, preds)
    tfops = tf.sign(grad_0)

    dataset_list = load_csv(filename)
    del dataset_list[0]
    for i in range(len(dataset_list[0])):
        str_column_to_float(dataset_list, i)
    # print("-->dataset:", np.array(dataset_list))
    # print(np.array(dataset_list).shape)

    new_dataset = []
    for d in dataset_list:
        del (d[-1])
        # d_plus = clip(np.array([d]), data_config[dataset_name]).astype("int")
        # d = d_plus[0]

        #clip d in dataset_list
        # d = clip(d, data_config[dataset])
        # d = list(np.array(d).astype("int"))

        # print(d, type(d), type(d[0]))
        # d = np.array([d])
        probs = model_prediction(sess, x, preds, np.array([d]))[0]  # n_probs: prediction vector
        label = np.argmax(probs)  # GET index of max value in n_probs
        prob = probs[label]
        # d = np.array(d, label)
        d.append(label)
        # print(d)
        new_dataset.append(d)

    # print("-->dataset:", np.array(new_dataset))
    # print(np.array(new_dataset).shape)
    original_dataset = new_dataset

    def decision_tree_accuracy(feature_set):
        seed(1)
        original_data = get_DT_cluster(original_dataset, cluster_num, feature_set, params)
        print(len(original_data))
        scores, dif_scores, trees = evaluate_algorithm(original_data, decision_tree, n_folds, max_depth, min_size)
        # print("-->scores, dif_scores:", scores, dif_scores)
        all_scores = []
        all_scores.append(scores)
        all_scores.append(sum([s[1] for s in dif_scores]) / float(len(dif_scores)))
        all_scores.append(sum([s[2] for s in dif_scores]) / float(len(dif_scores)))
        print('Scores: %s' % scores)
        print('Mean Accuracy: %.3f%%' % (sum(scores) / float(len(scores))))
        # print("-->dif_scores:", dif_scores)
        print('0 Mean Accuracy: %.3f%%' % (sum([s[1] for s in dif_scores]) / float(len(dif_scores))))
        print('1 Mean Accuracy: %.3f%%' % (sum([s[2] for s in dif_scores]) / float(len(dif_scores))))

        f_accuracy.write(str(sum(scores) / float(len(scores))) + " ")
        f_accuracy.write(str(sum([s[1] for s in dif_scores]) / float(len(dif_scores))) + " ")
        f_accuracy.write(str(sum([s[2] for s in dif_scores]) / float(len(dif_scores))) + "\n")

        max_index = scores.index(max(scores))

        return all_scores, trees[max_index]

    def perturbation(sess, preds, x, feature_set, condition, clusters, limit, original_dataset):
        # grad_0 = gradient_graph(x, preds)
        # print("-->feature_set1:", feature_set)

        # inputs = get_cluster(sess, x, preds, dataset, cluster_num, feature_set, condition)
        basic_label = condition[-1][0]
        inputs = seed_test_input(clusters, limit, basic_label, feature_set, condition, original_dataset)
        # print("-->inputs:", inputs)

        length = len(inputs)
        print("-->length1", length)

        seed_num = 0
        ci_num = 0
        r = False
        itr_num = 0
        get_CI = False
        final_itr_num = 0
        zero_gradient_itr = 0

        # print("-->inputs", inputs)

        for num in range(len(inputs)):
            # print("-->seed iteration: ", num)
            seed_num += 1

            index = inputs[num]
            sample = original_dataset[index][:-1]
            sample = np.array([sample])
            # sample = X[index:index + 1]
            # sample = X[index]
            # print("-->sample:", sample)
            # probs = model_prediction(sess, x, preds, sample)[0]
            # label = np.argmax(probs)  # index of maximum probability in prediction
            # label1 = original_dataset[index][-1]
            # if label != label1:
            #     print("label != label1")
            # if label != basic_label:
            #     print("label != basic_label")
            # print("-->basic_label:", label)

            for iter in range(max_iter + 1):  # 10
                # print("--> global iteration:", iter)
                itr_num += 1
                # print("--> sample:", sample)

                s_grad = sess.run(tfops, feed_dict={x: sample})
                g_diff = s_grad[0]
                # print("-->g_diff", g_diff)

                # features in feature_set unchange
                # print("-->index in feature set:", feature_set)
                for index in feature_set:
                    g_diff[index - 1] = 0
                # print("-->g_diff", g_diff)

                if np.zeros(input_shape[1]).tolist() == g_diff.tolist():
                    # print("-->0 gradient")
                    # zero_gradient_itr += 1
                    # index = np.random.randint(len(g_diff) - 1)
                    # g_diff[index] = 1.0*
                    break

                # sample[0] = clip(sample[0] + perturbation_size * g_diff, data_config[dataset]).astype("int")
                # n_sample = sample.copy()
                # print("1-->n_sample:", n_sample)

                n_sample = []
                new_sample = clip(sample[0] + perturbation_size * g_diff, data_config[dataset])
                n_sample.append(new_sample)
                n_sample = np.array(n_sample)
                # print("2-->n_sample:", n_sample)

                n_probs = model_prediction(sess, x, preds, n_sample)[0]  # n_probs: prediction vector
                n_label = np.argmax(n_probs)  # GET index of max value in n_probs
                # print("-", n_label)

                if n_label != basic_label:
                    # print("-->label != n_label")
                    # print("final label:", label, n_label)
                    # print("-->n_sample:", n_sample)
                    ci_num += 1
                    if get_CI == False:
                        final_itr_num = itr_num
                    get_CI = True
                    r = True
                    break
                    # return True
        # return False
        print(r, ci_num, seed_num, final_itr_num)
        return r, ci_num, seed_num, final_itr_num

    all_feature_set = list(range(1, data_config[dataset].params + 1))
    cluster_num = 4
    params = data_config[dataset].params
    max_depth = 2
    min_size = 10
    feature_sets = list(itertools.combinations(all_feature_set, k))
    print(feature_sets)

    DT_file_index = 0
    scores = []
    feature_sets = [(12, 16)]
    for feature_set in feature_sets:
        print("-->feature_set", feature_set)
        # decision tree
        # tree = all_DT_trees[DT_file_index]
        # tree = dict(eval(tree))

        DT_file_index += 1

        start1 = time.clock()
        score, tree = decision_tree_accuracy(feature_set)
        end1 = time.clock()
        f_trees.write(str(tree) + "\n")
        f_time.write(str(end1 - start1) + " ")

        # perturbation
        print("-->tree:", tree)
        tree_conditions = []
        get_conditions(tree, result=tree_conditions, dir=-1, tmp=[])
        print("-->tree_condition:", tree_conditions)
        # print(tree_conditions[15])
        all_result = []
        all_general_result = []
        results = []
        number = 1
        feature_set = list(feature_set)
        all_ci_num = 0
        all_seed_num = 0
        all_itr_num = 0

        limit = 1000
        clusters = get_cluster(dataset, cluster_num, feature_set)

        tree_brench = len(tree_conditions)

        # set tree conditions
        # tree_conditions = [tree_conditions[6]]

        start2 = time.clock()
        for condition in tree_conditions:
            print("sequence:", number, condition)
            result, ci_num, seed_num, itr_num = perturbation(sess, preds, x, feature_set, condition, clusters, limit,
                                                             original_dataset)
            # sess, preds, x, feature_set, condition, clusters, limit
            all_ci_num += ci_num
            all_seed_num += seed_num
            results.append(result)
            print("-->result:", result)
            if result == True:
                all_itr_num += itr_num
            number += 1
        all_result.append(results)
        true_num = results.count(True)
        print("-->results:", results)
        print("-->counter instance:", all_ci_num, all_seed_num, all_ci_num / float(all_seed_num))
        print("-->iteration num:", all_itr_num / float(true_num))

        # file 2 counter instance
        f_ci.write(str(all_ci_num) + " " + str(all_seed_num) + " " + str(all_ci_num / float(all_seed_num)) + "\n")

        # file 3 iteration num
        f_iteration.write(str(all_itr_num / float(true_num)) + " " + str(true_num / float(tree_brench)) + "\n")

        if len(results) == len(tree_conditions):
            if not any(results):
                print("-->used features:", feature_set)
                print("-->all_results:", all_result)
                print("-->interpretable!")
                break

        end2 = time.clock()
        f_time.write(str(end2 - start2) + "\n")

    return