Exemple #1
0
def main():

    # dataset_name = 'german_credit.csv'
    path_data = '/Users/Harry/Desktop/LORE-master/datasets/'
    # dataset = prepare_german_dataset(dataset_name, path_data)

#    dataset_name = 'compas-scores-two-years.csv'
#    dataset = prepare_compass_dataset(dataset_name, path_data)
#    print(dataset['label_encoder'][dataset['class_name']].classes_)
#    print(dataset['possible_outcomes'])

    dataset_name = 'adult.csv'
    dataset = prepare_adult_dataset(dataset_name, path_data)

    X, y = dataset['X'], dataset['y']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    blackbox = RandomForestClassifier(n_estimators=20)
    blackbox.fit(X_train, y_train)

    X2E = X_test
    y2E = blackbox.predict(X2E)
    y2E = np.asarray([dataset['possible_outcomes'][i] for i in y2E])

    idx_record2explain = 0

    explanation, infos = lore.explain(idx_record2explain, X2E, dataset, blackbox,
                                      ng_function=genetic_neighborhood,
                                      discrete_use_probabilities=True,
                                      continuous_function_estimation=False,
                                      returns_infos=True,
                                      path=path_data, sep=';', log=False)

    dfX2E = build_df2explain(blackbox, X2E, dataset).to_dict('records')
    dfx = dfX2E[idx_record2explain]
    # x = build_df2explain(blackbox, X2E[idx_record2explain].reshape(1, -1), dataset).to_dict('records')[0]

    print('x = %s' % dfx)
    print('r = %s --> %s' % (explanation[0][1], explanation[0][0]))
    for delta in explanation[1]:
        print('delta', delta)

    covered = lore.get_covered(explanation[0][1], dfX2E, dataset)
    print(len(covered))
    print(covered)

    print(explanation[0][0][dataset['class_name']], '<<<<')

    def eval(x, y):
        return 1 if x == y else 0

    precision = [1-eval(v, explanation[0][0][dataset['class_name']]) for v in y2E[covered]]
    print(precision)
    print(np.mean(precision), np.std(precision))
Exemple #2
0
def main():

    dataset_name = 'german_credit.csv'
    path_data = './datasets/'
    dataset = prepare_german_dataset(dataset_name, path_data)

    X, y = dataset['X'], dataset['y']
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    blackbox = RandomForestClassifier(n_estimators=20)
    blackbox.fit(X_train, y_train)

    X2E = X_test
    idx_record2explain = 1

    explanation, infos = lore.explain(idx_record2explain,
                                      X2E,
                                      dataset,
                                      blackbox,
                                      ng_function=genetic_neighborhood,
                                      discrete_use_probabilities=True,
                                      continuous_function_estimation=True,
                                      returns_infos=True)

    x = build_df2explain(blackbox, X2E[idx_record2explain].reshape(1, -1),
                         dataset).to_dict('records')[0]

    print('x = %s' % x)
    print('r = %s --> %s' % (explanation[0][1], explanation[0][0]))
    for delta in explanation[1]:
        print('delta', delta)

    print('Evaluation')
    bb_outcome = infos['bb_outcome']
    cc_outcome = infos['cc_outcome']
    y_pred_bb = infos['y_pred_bb']
    y_pred_cc = infos['y_pred_cc']
    dfZ = infos['dfZ']
    dt = infos['dt']
    tree_path = infos['tree_path']
    leaf_nodes = infos['leaf_nodes']
    diff_outcome = infos['diff_outcome']

    print(
        evaluate_explanation(x, blackbox, dfZ, dt, tree_path, leaf_nodes,
                             bb_outcome, cc_outcome, y_pred_bb, y_pred_cc,
                             diff_outcome, dataset, explanation[1]))
def run_experiment(blackbox,
                   X2E,
                   y2E,
                   idx_record2explain,
                   dataset,
                   anchor_explainer,
                   path_data,
                   verbose=False):

    nbr_run = 3

    print(datetime.datetime.now(), '\tLORE')

    features_lore = list()
    features_values_lore = list()
    nbr_features_lore = list()

    for k in range(nbr_run):
        print('%d, ' % k, end='')
        attempt = 0
        while True:
            # try:
            # Explanation with LORE
            lore_explanation, lore_info = lore.explain(
                idx_record2explain,
                X2E,
                dataset,
                blackbox,
                ng_function=genetic_neighborhood,
                discrete_use_probabilities=True,
                continuous_function_estimation=False,
                returns_infos=True,
                path=path_data,
                sep=';',
                log=verbose)

            lrule = lore_explanation[0][1]
            features_lore.append(list(lrule.keys()))
            features_values_lore.append(lrule)
            nbr_features_lore.append(len(list(lrule.keys())))

        # except Exception:
        #     pass
        #
        # if attempt >= 3:
        #     break
        #
        # attempt += 1
    print('')

    print(datetime.datetime.now(), '\tAnchor')

    features_anchor = list()
    features_values_anchor = list()
    nbr_features_anchor = list()

    for k in range(nbr_run):
        print('%d, ' % k, end='')
        attempt = 0
        while True:
            try:
                # Explanation with Anchor
                anchor_explanation, anchor_info = anchor_explainer.explain_instance(
                    X2E[idx_record2explain].reshape(1, -1),
                    blackbox.predict,
                    threshold=0.95)

                arule = anchor2arule(anchor_explanation)
                features_anchor.append(list(arule.keys()))
                features_values_anchor.append(arule)
                nbr_features_anchor.append(len(list(arule.keys())))

            except Exception:
                pass

            if attempt >= 3:
                break

            attempt += 1
    print('')

    jaccard_features_lore = list()
    same_features_values_lore = list()
    deviation_nbr_features_lore = list()

    jaccard_features_anchor = list()
    same_features_values_anchor = list()
    deviation_nbr_features_anchor = list()

    # print(len(features_lore))
    # print(features_lore)

    # print(len(features_anchor))
    # print(features_anchor)

    for i1 in range(0, 10):
        for i2 in range(i1, 10):
            if len(features_lore) > i2:
                jl = len(set(features_lore[i1])
                         & set(features_lore[i2])) / len(
                             set(features_lore[i1]) | set(features_lore[i2]))
                sl = 1 if features_values_lore[i1] == features_values_lore[
                    i2] else 0
                dl = np.abs(nbr_features_lore[i1] - nbr_features_lore[i2])
                # print(jl,sl,dl)
                jaccard_features_lore.append(jl)
                same_features_values_lore.append(sl)
                deviation_nbr_features_lore.append(dl)
            if len(features_anchor) > i2:
                ja = len(set(features_anchor[i1])
                         & set(features_anchor[i2])) / len(
                             set(features_anchor[i1])
                             | set(features_anchor[i2]))
                sa = 1 if features_values_anchor[i1] == features_values_anchor[
                    i2] else 0
                da = np.abs(nbr_features_anchor[i1] - nbr_features_anchor[i2])
                # print(ja, sa, da)
                jaccard_features_anchor.append(ja)
                same_features_values_anchor.append(sa)
                deviation_nbr_features_anchor.append(da)

    res = '%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f' % (
        np.mean(jaccard_features_lore),
        np.std(jaccard_features_lore),
        np.mean(same_features_values_lore),
        np.std(same_features_values_lore),
        np.mean(deviation_nbr_features_lore),
        np.std(deviation_nbr_features_lore),
        np.mean(jaccard_features_anchor),
        np.std(jaccard_features_anchor),
        np.mean(same_features_values_anchor),
        np.std(same_features_values_anchor),
        np.mean(deviation_nbr_features_anchor),
        np.std(deviation_nbr_features_anchor),
    )
    return res
def run_experiment(blackbox,
                   X2E,
                   y2E,
                   idx_record2explain,
                   dataset,
                   anchor_explainer,
                   path_data,
                   verbose=False):

    # class_name = dataset['class_name']
    # columns = dataset['columns']
    # features_type = dataset['features_type']
    # discrete = dataset['discrete']
    # continuous = dataset['continuous']
    # possible_outcomes = dataset['possible_outcomes']
    # label_encoder = dataset['label_encoder']

    # Remove From the Dataset to Explain x and return both them
    # starttime = datetime.datetime.now()
    # dfX2E, x = dataframe2explain(X2E, dataset, idx_record2explain, blackbox)

    # Run Black Box on Instance to Explain
    bb_outcome = y2E[
        idx_record2explain]  #blackbox.predict(x.reshape(1, -1))[0]
    # print(bb_outcome, type(bb_outcome))

    dfX2E = build_df2explain(blackbox, X2E, dataset).to_dict('records')

    individual_hit_lore = 0
    fidelity_acc_lore = fidelity_f1_lore = coverage_lore = coverage_Z_lore = 0
    precision_lore = [0]
    individual_hit_anchor = fidelity_acc_anchor = fidelity_f1_anchor = coverage_anchor = coverage_Z_anchor = 0
    precision_anchor = [0]

    def eval(x, y):
        return 1 if x == y else 0

    print(datetime.datetime.now(), '\tLORE')
    attempt = 0
    while True:
        try:
            # Explanation with LORE
            lore_explanation, lore_info = lore.explain(
                idx_record2explain,
                X2E,
                dataset,
                blackbox,
                ng_function=genetic_neighborhood,
                discrete_use_probabilities=True,
                continuous_function_estimation=False,
                returns_infos=True,
                path=path_data,
                sep=';',
                log=verbose)

            cc_outcome_lore = lore_explanation[0][0][dataset['class_name']]
            # print(cc_outcome_lore, type(cc_outcome_lore), bb_outcome, type(bb_outcome))
            # print(cc_outcome_lore == bb_outcome)
            individual_hit_lore = hit_outcome(bb_outcome, cc_outcome_lore)

            y_pred_bb_lore = lore_info['y_pred_bb']
            y_pred_cc_lore = lore_info['y_pred_cc']
            fidelity_acc_lore = accuracy_score(y_pred_bb_lore, y_pred_cc_lore)
            fidelity_f1_lore = f1_score(y_pred_bb_lore, y_pred_cc_lore)

            lrule = lore_explanation[0][1]
            # print(lrule)
            covered_lore = lore.get_covered(lrule, dfX2E, dataset)
            coverage_lore = len(covered_lore) / len(dfX2E)
            precision_lore = [
                1 - eval(v, cc_outcome_lore) for v in y2E[covered_lore]
            ]
            covered_Z_lore = lore.get_covered(
                lrule, lore_info['dfZ'].to_dict('records'), dataset)
            coverage_Z_lore = len(covered_Z_lore) / len(lore_info['dfZ'])
            # print(coverage_lore)
            # print(covered_Z_lore)
            # print(coverage_Z_lore)

            if coverage_lore > 0.0 and coverage_Z_lore > 0.0:
                break

        except Exception:
            pass

        if attempt >= 5:
            break

        attempt += 1

    print(datetime.datetime.now(), '\tAnchor')
    attempt = 0
    while True:
        try:
            # Explanation with Anchor
            anchor_explanation, anchor_info = anchor_explainer.explain_instance(
                X2E[idx_record2explain].reshape(1, -1),
                blackbox.predict,
                threshold=0.95)

            Zanchor = anchor_info['state']['raw_data']
            y_pred_bb_anchor = blackbox.predict(Zanchor)
            y_pred_cc_anchor = blackbox.predict(Zanchor)
            fidelity_acc_anchor = accuracy_score(y_pred_bb_anchor,
                                                 y_pred_cc_anchor)
            fidelity_f1_anchor = f1_score(y_pred_bb_anchor, y_pred_cc_anchor)

            arule = anchor2arule(anchor_explanation)
            # print(arule)

            covered_anchor = lore.get_covered(arule, dfX2E, dataset)
            coverage_anchor = len(covered_anchor) / len(dfX2E)
            if len(covered_anchor) > 0:
                if isinstance(y2E[0], str):
                    cc_outcome_anchor = mode(y2E[covered_anchor])
                else:
                    cc_outcome_anchor = int(
                        np.round(y2E[covered_anchor].mean()))
            else:
                cc_outcome_anchor = bb_outcome

            # print(cc_outcome_anchor, type(cc_outcome_anchor))
            individual_hit_anchor = hit_outcome(bb_outcome, cc_outcome_anchor)
            precision_anchor = [
                1 - eval(v, cc_outcome_anchor) for v in y2E[covered_anchor]
            ]

            dfZanchor = build_df2explain(blackbox, Zanchor,
                                         dataset).to_dict('records')[:1000]
            covered_Z_anchor = lore.get_covered(arule, dfZanchor, dataset)
            coverage_Z_anchor = len(covered_Z_anchor) / len(Zanchor)

        except Exception:
            pass

        if attempt >= 5:
            break

        attempt += 1

    res = '%d,%.6f,%.6f,%.6f,%.6f,%.6f,%d,%.6f,%.6f,%.6f,%.6f,%.6f' % (
        individual_hit_lore,
        fidelity_acc_lore,
        fidelity_f1_lore,
        coverage_lore,
        np.mean(precision_lore),
        coverage_Z_lore,
        individual_hit_anchor,
        fidelity_acc_anchor,
        fidelity_f1_anchor,
        coverage_anchor,
        np.mean(precision_anchor),
        coverage_Z_anchor,
    )
    return res