Beispiel #1
0
 def predict(X):
     y, ln, = pyyadt.predict(dt, X, class_name, features_type, discrete, continuous)
     return y, ln
Beispiel #2
0
def explain(idx_record2explain,
            X2E,
            dataset,
            blackbox,
            ng_function=genetic_neighborhood,
            discrete_use_probabilities=False,
            continuous_function_estimation=False,
            returns_infos=False):
    random.seed(0)
    class_name = dataset['class_name']
    columns = dataset['columns']
    discrete = dataset['discrete']
    continuous = dataset['continuous']
    features_type = dataset['features_type']
    label_encoder = dataset['label_encoder']
    possible_outcomes = dataset['possible_outcomes']

    # Dataset Preprocessing
    dataset['feature_values'] = calculate_feature_values(
        X2E,
        columns,
        class_name,
        discrete,
        continuous,
        discrete_use_probabilities=discrete_use_probabilities,
        continuous_function_estimation=continuous_function_estimation)

    dfZ, x = dataframe2explain(X2E, dataset, idx_record2explain, blackbox)

    # Generate Neighborhood
    dfZ, Z = ng_function(dfZ, x, blackbox, dataset)

    # Build Decision Tree
    dt, dt_dot = pyyadt.fit(dfZ,
                            class_name,
                            columns,
                            features_type,
                            discrete,
                            continuous,
                            filename=dataset['name'],
                            path='./',
                            sep=';',
                            log=False)

    # Apply Black Box and Decision Tree on instance to explain
    bb_outcome = blackbox.predict(x.reshape(1, -1))[0]

    dfx = build_df2explain(blackbox, x.reshape(1, -1),
                           dataset).to_dict('records')[0]
    cc_outcome, rule, tree_path = pyyadt.predict_rule(dt, dfx, class_name,
                                                      features_type, discrete,
                                                      continuous)

    # Apply Black Box and Decision Tree on neighborhood
    y_pred_bb = blackbox.predict(Z)
    y_pred_cc, leaf_nodes = pyyadt.predict(dt, dfZ.to_dict('records'),
                                           class_name, features_type, discrete,
                                           continuous)

    # Update labels if necessary
    if class_name in label_encoder:
        cc_outcome = label_encoder[class_name].transform(np.array([cc_outcome
                                                                   ]))[0]

    if class_name in label_encoder:
        y_pred_cc = label_encoder[class_name].transform(y_pred_cc)

    # Extract Coutnerfactuals
    diff_outcome = get_diff_outcome(bb_outcome, possible_outcomes)
    counterfactuals = pyyadt.get_counterfactuals(dt, tree_path, rule,
                                                 diff_outcome, class_name,
                                                 continuous, features_type)

    explanation = (rule, counterfactuals)

    infos = {
        'bb_outcome': bb_outcome,
        'cc_outcome': cc_outcome,
        'y_pred_bb': y_pred_bb,
        'y_pred_cc': y_pred_cc,
        'dfZ': dfZ,
        'Z': Z,
        'dt': dt,
        'tree_path': tree_path,
        'leaf_nodes': leaf_nodes,
        'diff_outcome': diff_outcome
    }

    if returns_infos:
        return explanation, infos

    return explanation
Beispiel #3
0
def main():
    random.seed(0)

    X, y = make_moons(n_samples=1000, noise=0.3, random_state=0)
    X = StandardScaler().fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    class_name = 'class'
    columns = ['class', 'X0', 'X1']
    df = pd.DataFrame(np.concatenate((y_train.reshape(-1, 1), X_train),
                                     axis=1),
                      columns=columns)

    features_type = {'X0': 'double', 'X1': 'double', 'class': 'string'}
    discrete = ['class']
    continuous = ['X0', 'X1']
    discrete_no_class = list(discrete)
    discrete_no_class.remove(class_name)
    possible_outcomes = list(df[class_name].unique())
    _, label_encoder = label_encode(df, discrete)

    columns_tmp = list(columns)
    columns_tmp.remove(class_name)
    idx_features = {i: col for i, col in enumerate(columns_tmp)}

    dataset = {
        'class_name': class_name,
        'columns': columns,
        'features_type': features_type,
        'discrete': discrete,
        'continuous': continuous,
        'label_encoder': label_encoder,
        'possible_outcomes': possible_outcomes,
        'idx_features': idx_features,
    }

    # dataset_name = 'german_credit.csv'
    # path_data = './datasets/'
    # dataset = prepare_german_dataset(dataset_name, path_data)
    #
    # X, y = dataset['X'], dataset['y']
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    #
    # class_name = dataset['class_name']
    # columns = dataset['columns']
    # discrete = dataset['discrete']
    # continuous = dataset['continuous']
    # features_type = dataset['features_type']
    # label_encoder = dataset['label_encoder']

    yX = np.concatenate((y_train.reshape(-1, 1), X_train), axis=1)
    data = list()
    for i, col in enumerate(columns):
        data_col = yX[:, i]
        data_col = data_col.astype(int) if col in discrete else data_col
        data_col = data_col.astype(
            int) if features_type[col] == 'integer' else data_col
        data.append(data_col)
    data = map(list, map(None, *data))
    dfZ = pd.DataFrame(data=data, columns=columns)
    dfZ = label_decode(dfZ, discrete, label_encoder)

    dt, dt_dot = pyyadt.fit(dfZ,
                            class_name,
                            columns,
                            features_type,
                            discrete,
                            continuous,
                            filename='pyyadt_test',
                            path='./',
                            sep=';',
                            log=False)

    dt_dot.write_png('pyyadt_test.png')
    # img = Image.open('pyyadt_test.png')
    # img.show()

    y_pred_cc, leaf_nodes = pyyadt.predict(dt, dfZ.to_dict('records'),
                                           class_name, features_type, discrete,
                                           continuous)

    idx_record2explain = 11
    print dfZ.to_dict('records')[idx_record2explain]
    cc_outcome, rule, tree_path = pyyadt.predict_rule(
        dt,
        dfZ.to_dict('records')[idx_record2explain], class_name, features_type,
        discrete, continuous)

    print cc_outcome
    print rule
    print tree_path

    print pyyadt.get_covered_record_index(tree_path, leaf_nodes)[:10]