Esempio n. 1
0
def run_trepan(X_train, X_test, y_train, y_test, discrete_list, dataset_par,
               model):
    X_train, X_test = X_train.to_numpy(), X_test.to_numpy()
    n_class = dataset_par['classes']

    oracle = Oracle(model, n_class, X_train, discrete_list)
    tree_obj = Tree(oracle)

    # build tree with TREPAN
    root = tree_obj.build_tree()
    tree_obj.assign_levels(root, 0)

    # tree_obj.print_tree_levels(root)
    final_labels = tree_obj.leaf_values(root)
    print(final_labels)
    tree_obj.print_tree_rule(root)
    final_rules = tree_obj.rule_list(root)
    print(final_rules)

    # calculate metrics
    num_test_examples = X_test.shape[0]

    predi_torch = np.argmax(model.predict(X_test), axis=1)
    perturbed_data = perturbator(X_test)

    rule_labels = []
    perturbed_labels = []
    for i in range(0, num_test_examples):
        instance = X_test[i, :]
        instance_label = tree_obj.predict(instance, root)
        rule_labels.append(instance_label)
        perturbed_instance = perturbed_data[i, :]
        perturbed_labels.append(tree_obj.predict(perturbed_instance, root))

    rule_write('TREPAN_', final_rules, dataset_par)
    correctness = accuracy_score(y_test, rule_labels)
    fidelity = accuracy_score(predi_torch, rule_labels)
    robustness = accuracy_score(rule_labels, perturbed_labels)
    rule_n = len(final_rules)
    avg_length = 0
    for item in final_rules:
        avg_length += sum([len(d['n']) for d in item])
    avg_length = avg_length / rule_n
    class_fraction = len(set(final_labels)) / n_class
    print("Completeness of the ruleset is: " + str(1))
    print("Correctness of the ruleset is: " + str(correctness))
    print("Fidelity of the ruleset is: " + str(fidelity))
    print("Robustness of the ruleset is: " + str(robustness))
    print("Number of rules : " + str(rule_n))
    print("Average rule length: " + str(avg_length))
    print("Fraction overlap: " + str(0))
    print("Fraction of classes: " + str(class_fraction))
    return [
        1, correctness, fidelity, robustness, rule_n, avg_length, 0,
        class_fraction
    ]
Esempio n. 2
0
def refne_run(X_train, X_test, y_test, discrete_attributes,
              continuous_attributes, dataset_par, model, save_graph):

    label_col = dataset_par['output_name']
    discrete_attributes = column_translator(X_train, label_col,
                                            discrete_attributes)
    continuous_attributes = column_translator(X_train, label_col,
                                              continuous_attributes)
    all_column_combos = column_combos(categorical_var=discrete_attributes,
                                      continuous_var=continuous_attributes)
    synth_samples = X_train.shape[0]
    xSynth = synthetic_data_generator(X_train, synth_samples)
    xSynth = xSynth.append(X_train, ignore_index=True)
    ySynth = np.argmax(model.predict(xSynth), axis=1)
    n_class = dataset_par['classes']

    # Discretizing the continuous attributes
    attr_list = xSynth.columns.tolist()
    xSynth[label_col] = ySynth

    interv_dict = {}
    for attr in attr_list:
        if attr in continuous_attributes:
            interv = interval_definer(data=xSynth, attr=attr, label=label_col)
            xSynth[attr] = discretizer(xSynth[attr], interv)
            X_train[attr] = discretizer(X_train[attr], interv)
            interv_dict[attr] = interv
        else:
            unique_values = np.unique(xSynth[attr]).tolist()
            interv_dict[attr] = [
                list(a) for a in zip(unique_values, unique_values)
            ]

    final_rules = rule_maker(xSynth, interv_dict, all_column_combos, label_col,
                             model)

    # Calculation of metrics
    predicted_labels = np.argmax(model.predict(X_test), axis=1)
    metrics = rule_metrics_calculator(X_test, y_test, predicted_labels,
                                      final_rules, n_class)
    rule_write('REFNE_', final_rules, dataset_par)
    if save_graph:
        attack_list, final_rules = attack_definer(X_test,
                                                  final_rules,
                                                  merge_rules=True)
        create_empty_file('REFNE_' + dataset_par['dataset'] + "_attack_list")
        save_list(attack_list,
                  'REFNE_' + dataset_par['dataset'] + "_attack_list")
        create_empty_file('REFNE_' + dataset_par['dataset'] + "_final_rules")
        save_list(final_rules,
                  'REFNE_' + dataset_par['dataset'] + "_final_rules")

    return metrics
Esempio n. 3
0
def run_c45_pane(X_train, X_test, y_test, dataset_par, model, labels):
    print(labels)
    x_tot, y_tot, clf = create_tree(X_train, model)
    # rules = export_text(clf)

    # Showing the rules
    # print_decision_tree(clf)

    predicted_labels = clf.predict(X_test)
    model_test_labels = np.argmax(model.predict(X_test), axis=1)

    perturbed_data = perturbator(X_test)
    perturbed_labels = clf.predict(perturbed_data)

    num_test_examples = X_test.shape[0]
    depths = get_node_depths(clf.tree_)
    rule_write('C45_', clf, dataset_par)

    return metric_calculator(predicted_labels, y_test,
                             model_test_labels, perturbed_labels, depths,
                             len(labels), num_test_examples)
Esempio n. 4
0
def rxren_run(X_train, X_test, y_train, y_test, dataset_par, model,
              save_graph):
    y = np.concatenate((y_train, y_test), axis=0)
    column_lst = X_train.columns.tolist()
    column_dict = {i: column_lst[i] for i in range(len(column_lst))}

    X_train, X_test = X_train.to_numpy(), X_test.to_numpy()
    n_class = dataset_par['classes']
    # This will be used for calculating the final metrics
    predicted_labels = np.argmax(model.predict(X_test), axis=1)

    # model = load_model(MODEL_NAME)
    weights = np.array(model.get_weights())
    results = np.argmax(model.predict(X_train), axis=1)

    correctX = X_train[[results[i] == y_train[i] for i in range(len(y_train))]]
    print('Number of correctly classified examples', correctX.shape)
    correcty = y_train[[results[i] == y_train[i] for i in range(len(y_train))]]
    acc = accuracy_score(results, y_train)
    print("Accuracy of original model on the train dataset: ", acc)
    test_pred = np.argmax(model.predict(X_test), axis=1)
    test_acc = accuracy_score(test_pred, y_test)
    print("Accuracy of original model on the test dataset: ", test_acc)

    miss_list, ins_index, new_accuracy, err = network_pruning(
        weights,
        correctX,
        correcty,
        X_test,
        y_test,
        test_acc,
        in_item=dataset_par)

    significant_index = [
        i for i in range(weights[0].shape[0]) if i not in ins_index
    ]
    significant_columns = {
        i: v
        for i, v in column_dict.items() if i in significant_index
    }

    print("Accuracy of pruned network", new_accuracy)
    rule_limits = rule_limits_calculator(correctX,
                                         correcty,
                                         miss_list,
                                         significant_index,
                                         err,
                                         alpha=0.5)
    rule_limits = rule_formatter(rule_combiner(rule_limits))

    rule_limits, rule_acc = rule_pruning(X_test, y_test, rule_limits, n_class)

    y_test_predicted = np.argmax(model.predict(X_test), axis=1)
    rule_simplifier = True
    while rule_simplifier:
        new_rule_acc, rule_limits = rule_evaluator(X_test, y_test_predicted,
                                                   rule_limits, rule_acc,
                                                   np.unique(y))
        if sum(new_rule_acc.values()) > sum(rule_acc.values()):
            rule_acc = new_rule_acc
        else:
            rule_simplifier = False

    final_rules = rule_sorter(rule_limits, X_test, significant_columns)

    X_test, _ = input_delete(ins_index, X_test)
    X_test = pd.DataFrame(X_test, columns=significant_columns.values())
    metrics = rule_metrics_calculator(X_test, y_test, predicted_labels,
                                      final_rules, n_class)
    rule_write('RxREN_', final_rules, dataset_par)
    if save_graph:
        attack_list, final_rules = attack_definer(X_test, final_rules)
        create_empty_file('RxREN_' + dataset_par['dataset'] + "_attack_list")
        save_list(attack_list,
                  'RxREN_' + dataset_par['dataset'] + "_attack_list")
        create_empty_file('RxREN_' + dataset_par['dataset'] + "_final_rules")
        save_list(final_rules,
                  'RxREN_' + dataset_par['dataset'] + "_final_rules")

    return metrics
Esempio n. 5
0
def rxncn_run(X_train, X_test, y_train, y_test, dataset_par, model, save_graph):
    # Alpha is set equal to the percentage of input instances belonging to the least-represented class in the dataset
    alpha = 0.1
    n_class = dataset_par['classes']
    X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.33)
    print(X_train.columns)

    column_lst = X_train.columns.tolist()
    column_dict = {i: column_lst[i] for i in range(len(column_lst))}

    y = np.concatenate((y_train, y_test), axis=0)
    X_train, X_test, X_val = X_train.to_numpy(), X_test.to_numpy(), X_val.to_numpy()

    weights = np.array(model.get_weights())
    results = model.predict_classes(X_train)

    # This will be used for calculating the final metrics
    predicted_labels = prediction_reshape(model.predict(np.concatenate([X_train, X_test, X_val], axis=0)))

    correctX = X_train[[results[i] == y_train[i] for i in range(len(y_train))]]
    print('Number of correctly classified examples', correctX.shape)
    correcty = y_train[[results[i] == y_train[i] for i in range(len(y_train))]]
    acc = accuracy_score(results, y_train)
    print("Accuracy of original model on the train dataset: ", acc)
    test_pred = prediction_reshape(model.predict(X_val))
    test_acc = accuracy_score(test_pred, y_val)
    print("Accuracy of original model on the validation dataset: ", test_acc)

    miss_dict, pruned_x, pruned_w, err, sig_cols = network_pruning(weights, correctX, correcty, X_val, y_val,
                                                                   test_acc, column_dict, in_item=dataset_par)

    correct_dict = correct_examples_finder(pruned_x, correcty, dataset_par, sig_cols, in_weight=pruned_w)
    final_dict = combine_dict_list(miss_dict, correct_dict)

    rule_limits = rule_limits_calculator(pruned_x, correcty, final_dict, sig_cols, alpha=alpha)
    rule_limits = rule_formatter(rule_limits)

    if len(rule_limits) > 0:
        insignificant_neurons = [key for key, value in column_dict.items() if value not in list(sig_cols.values())]
        X_test, _ = input_delete(insignificant_neurons, X_test)
        X_train, _ = input_delete(insignificant_neurons, X_train)
        X_val, _ = input_delete(insignificant_neurons, X_val)
        X_tot = np.concatenate([X_train, X_test, X_val], axis=0)
        y_tot = np.concatenate([y_train, y_test, y_val], axis=0)

        rule_limits, rule_accuracy = rule_pruning(X_val, y_val, rule_limits, n_class)
        final_rules = rule_sorter(rule_limits, X_test, sig_cols)

        y_val_predicted = model_pruned_prediction([], X_val, dataset_par, in_weight=pruned_w)
        X_val = pd.DataFrame(X_val, columns=sig_cols.values())
        rule_simplifier = True
        while rule_simplifier:
            new_rule_acc, final_rules = rule_evaluator(X_val, y_val_predicted, final_rules, rule_accuracy, np.unique(y))
            if sum(new_rule_acc.values()) > sum(rule_accuracy.values()):
                rule_accuracy = new_rule_acc
            else:
                rule_simplifier = False

        X_tot = pd.DataFrame(X_tot, columns=sig_cols.values())
        # print(final_rules)
        metrics = rule_metrics_calculator(X_tot, y_tot, predicted_labels, final_rules, n_class)
        rule_write('RxNCM_', final_rules, dataset_par)
        if save_graph:
            attack_list, final_rules = attack_definer(X_tot, final_rules)
            create_empty_file('RxNCM_' + dataset_par['dataset'] + "_attack_list")
            save_list(attack_list, 'RxNCM_' + dataset_par['dataset'] + "_attack_list")
            create_empty_file('RxNCM_' + dataset_par['dataset'] + "_final_rules")
            save_list(final_rules, 'RxNCM_' + dataset_par['dataset'] + "_final_rules")

        return metrics
    else:
        return np.zeros(8).tolist()