def explain_wrap(index, columns):
    print("DOING {}".format(index))
    global count
    x, y = X_valid[index], Y_valid[index]
    explainer_xgb = explain(xg_predicted, data=X_train, y=Y_train, label="XGBoost model",
        predict_function=lambda X: xgmodel.predict_proba(X.to_numpy())[::, 1], variable_names=column_names)
    explainer_linear = explain(lin_predicted, data=X_train, y=Y_train, label="Logistic model",
        predict_function=lambda X: logmodel.predict_proba(X.to_numpy())[::, 1], variable_names=column_names)
    cp_xgb = individual_variable_profile(explainer_xgb, x, y)
    cp_lin = individual_variable_profile(explainer_linear, x, y)
    plot(cp_xgb, cp_lin, selected_variables=columns, destination="browser", show_observations=False)
    #IFrame(src="./_plot_files/plots{}.html".format(count), width=700, height=600)
    #with open("_plot_files/plots{}.html".format(count), 'r') as myfile:
#        display(HTML(myfile.read()))
    count += 1
Exemple #2
0
 def test_iris_classification_1(self):
     cp_profile = individual_variable_profile(self.explainer_rf,
                                              self.X[1],
                                              y=self.y[1])
     self.assertTrue(isinstance(cp_profile, CeterisParibus))
     self.assertIsNotNone(cp_profile.new_observation_true)
     self.assertEqual(len(cp_profile._predict_function(self.X[:3])), 3)
Exemple #3
0
 def test_regression_2(self):
     n = 3
     sample = select_sample(self.X_train, n=n)
     cp2 = individual_variable_profile(self.explainer_rf,
                                       sample,
                                       variables=['TAX', 'CRIM'])
     self.assertEqual(len(cp2.profile), cp2._grid_points * 2 * n)
Exemple #4
0
 def test_iris_classification_6(self):
     X_data = pd.DataFrame(self.X[5]).T
     cp_profile = individual_variable_profile(self.explainer_rf, X_data,
                                              self.y[5])
     self.assertEqual(
         len(cp_profile.profile),
         len(self.iris['feature_names']) * cp_profile._grid_points)
Exemple #5
0
 def test_keras_3(self):
     cp = individual_variable_profile(
         self.explainer_keras,
         self.x_train[5],
         y=self.y_train[5],
         variables=["CRIM", "ZN", "AGE", "INDUS", "B"])
     self.assertEqual(len(cp.new_observation_true), 1)
     self.assertEqual(len(cp.profile), cp._grid_points * 5)
Exemple #6
0
 def test_keras_2(self):
     cp = individual_variable_profile(
         self.explainer_keras,
         pd.DataFrame(self.x_train[:10]),
         y=list(self.y_train[:10]),
         variables=["CRIM", "ZN", "AGE", "INDUS", "B"])
     self.assertEqual(len(cp.new_observation_true), 10)
     self.assertEqual(len(cp.profile), cp._grid_points * 5 * 10)
Exemple #7
0
 def test_regression_1(self):
     cp_profile = individual_variable_profile(self.explainer_rf,
                                              self.X_train[0],
                                              y=self.y_train[0],
                                              variables=['TAX', 'CRIM'])
     self.assertIsNotNone(cp_profile.new_observation_true)
     self.assertEqual(len(cp_profile.selected_variables), 2)
     self.assertEqual(len(cp_profile.profile), cp_profile._grid_points * 2)
     self.assertIn("TAX", cp_profile.profile.columns)
Exemple #8
0
 def test_iris_classification_2(self):
     grid_points = 3
     feature = self.iris['feature_names'][0]
     cp_profile = individual_variable_profile(self.explainer_rf,
                                              self.X[:10],
                                              variables=[feature],
                                              grid_points=grid_points)
     self.assertIn(feature, cp_profile.profile.columns)
     self.assertEqual(len(cp_profile.profile), 10 * grid_points)
     self.assertIsNone(cp_profile.new_observation_true)
     self.assertEqual(cp_profile.selected_variables, [feature])
Exemple #9
0
 def test_iris_classification_5(self):
     feature = self.iris['feature_names'][0]
     X_data = pd.DataFrame(self.X[:20])
     y_data = pd.DataFrame(self.y)
     cp_profile = individual_variable_profile(self.explainer_rf,
                                              X_data,
                                              y_data,
                                              variables=[feature])
     self.assertEqual(len(cp_profile.profile), 20 * cp_profile._grid_points)
     self.assertLessEqual(max(cp_profile.profile['_yhat_']), 1)
     self.assertGreaterEqual(min(cp_profile.profile['_yhat_']), 0)
def create_cp(model, label, idx, path='../data/heloc_dataset_v1.csv'):
    data = pd.read_csv(path)
    column_list = prepare_data()
    explainer = explain(
        model,
        data=data[column_list],
        y=data.RiskPerformance,
        label=label,
        predict_function=lambda X: model.predict_proba(X)[::, 1])
    return individual_variable_profile(explainer, data[column_list].loc[idx],
                                       data.loc[idx, 'RiskPerformance'])
Exemple #11
0
 def test_iris_classification_3(self):
     feature = self.iris['feature_names'][0]
     grid_points = 5  # should be ignored
     num_points = 3
     cp_profile = individual_variable_profile(
         self.explainer_rf,
         self.X[:num_points],
         y=self.y[:num_points],
         variables=[feature],
         grid_points=grid_points,
         variable_splits={feature: [10, 31]})
     self.assertEqual(len(cp_profile.profile), num_points * 2)
Exemple #12
0
    def ceterisParibus_connector(self, feature, *arg):
        from ceteris_paribus.plots.plots import plot

        query_instance = dict(s.split(':') for s in arg)

        #print(feature)

        #prepare data instance (nparray)
        categories = self.getCategoricalFeatures()
        np_instance = []
        for f in self.featureNames:
            if f in categories:
                np_instance.append(query_instance[f])
            else:
                np_instance.append(float(query_instance[f]))
        #print(np_instance)

        prediction_proba = self.model.predict_proba(
            pd.DataFrame([query_instance]))[0]
        prediction = np.where(
            prediction_proba == np.amax(prediction_proba))[0][0]
        #print(prediction)

        explainer = explain(
            self.model,
            variable_names=self.featureNames,
            data=self.X_train,
            y=self.Y_train,
            label='Model',
            predict_function=lambda x: self.model.predict_proba(x)[::, 1])

        i = individual_variable_profile(explainer, np.array(np_instance),
                                        np.array([prediction]))

        p = plot(i,
                 selected_variables=[feature],
                 width=700,
                 height=800,
                 size=4)

        options = {'height': '500', 'width': '600'}

        imgkit.from_file('_plot_files/plots' + p + '.html',
                         'temp/plots' + p + '.jpg',
                         options=options)

        self.certainty = "I am 100 percent sure about the graph."
        return ("temp/plots" + str(p) + ".jpg")
Exemple #13
0
 def test_regression_3(self):
     variable_names = self.variable_names
     neighbours = select_neighbours(self.X_train,
                                    self.X_train[0],
                                    variable_names=variable_names,
                                    selected_variables=variable_names,
                                    n=15)
     cp3 = individual_variable_profile(self.explainer_rf,
                                       neighbours,
                                       variables=['LSTAT', 'RM'],
                                       variable_splits={
                                           'LSTAT': [10, 20, 30],
                                           'RM': [4, 5, 6, 7]
                                       })
     self.assertEqual(cp3.selected_variables, ['LSTAT', 'RM'])
     # num of different values in splits
     self.assertEqual(len(cp3.profile), 15 * 7)
numeric_features = ['age', 'bmi', 'children']
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

categorical_features = ['sex', 'smoker', 'region']
categorical_transformer = Pipeline(
    steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[(
    'num', numeric_transformer,
    numeric_features), ('cat', categorical_transformer, categorical_features)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor',
                       preprocessor), ('classifier', RandomForestRegressor())])

clf.fit(x, y)

from ceteris_paribus.explainer import explain

explainer_cat = explain(clf, var_names, x, y, label="categorical_model")

from ceteris_paribus.profiles import individual_variable_profile

cp_cat = individual_variable_profile(explainer_cat, x.iloc[:10], y.iloc[:10])

cp_cat.print_profile()
plot(cp_cat)

plot(cp_cat, color="smoker")
Exemple #15
0
    svm_model.fit(x, y)
    return svm_model, x, y, var_names


if __name__ == "__main__":

    (linear_model, data, labels, variable_names) = linear_regression_model()
    (gb_model, _, _, _) = gradient_boosting_model()
    (svm_model, _, _, _) = supported_vector_machines_model()

    explainer_linear = explain(linear_model, variable_names, data, y)
    explainer_gb = explain(gb_model, variable_names, data, y)
    explainer_svm = explain(svm_model, variable_names, data, y)

    # single profile
    cp_1 = individual_variable_profile(explainer_gb, x[0], y[0])
    plot(cp_1,
         destination="notebook",
         selected_variables=["bmi"],
         print_observations=False)

    # local fit
    neighbours_x, neighbours_y = select_neighbours(x, x[10], y=y, n=10)
    cp_2 = individual_variable_profile(explainer_gb, neighbours_x,
                                       neighbours_y)
    plot(cp_2,
         show_residuals=True,
         selected_variables=["age"],
         print_observations=False,
         color_residuals='red',
         plot_title='')
Exemple #16
0
                                                  random_state=42)
    gb_model.fit(x, y)
    return gb_model, x, y, var_names


def supported_vector_machines_model():
    svm_model = svm.SVR(C=0.01, gamma='scale')
    svm_model.fit(x, y)
    return svm_model, x, y, var_names


if __name__ == "__main__":
    (linear_model, data, labels, variable_names) = linear_regression_model()
    (gb_model, _, _, _) = gradient_boosting_model()
    (svm_model, _, _, _) = supported_vector_machines_model()

    explainer_linear = explain(linear_model, variable_names, data, y)
    explainer_gb = explain(gb_model, variable_names, data, y)
    explainer_svm = explain(svm_model, variable_names, data, y)

    cp_profile = individual_variable_profile(explainer_linear, x[0], y[0])
    plot(cp_profile, show_residuals=True)

    sample_x, sample_y = select_sample(x, y, n=10)
    cp2 = individual_variable_profile(explainer_gb, sample_x, y=sample_y)

    cp3 = individual_variable_profile(explainer_gb, x[0], y[0])
    plot(cp3, show_residuals=True)

    plot(cp_profile, cp3, show_residuals=True)
Exemple #17
0
X = iris['data']
y = iris['target']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

print(iris['feature_names'])


def random_forest_classifier():
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

    rf_model.fit(X_train, y_train)

    return rf_model, X_train, y_train, iris['feature_names']


if __name__ == "__main__":
    (model, data, labels, variable_names) = random_forest_classifier()
    predict_function = lambda X: model.predict_proba(X)[::, 0]
    explainer_rf = explain(model,
                           variable_names,
                           data,
                           labels,
                           predict_function=predict_function)
    cp_profile = individual_variable_profile(explainer_rf, X[1], y=y[1])
    plot(cp_profile)
    rf_model = ensemble.RandomForestRegressor(n_estimators=100,
                                              random_state=42)

    # Train the model using the training set
    rf_model.fit(X_train, y_train)

    # model, data, labels, variable_names
    return rf_model, X_train, y_train, list(boston['feature_names'])


if __name__ == "__main__":
    (model, data, labels, variable_names) = random_forest_regression()
    explainer_rf = explain(model, variable_names, data, labels)

    cp_profile = individual_variable_profile(explainer_rf,
                                             X_train[0],
                                             y=y_train[0],
                                             variables=['TAX', 'CRIM'])
    plot(cp_profile)

    sample = select_sample(X_train, n=3)
    cp2 = individual_variable_profile(explainer_rf,
                                      sample,
                                      variables=['TAX', 'CRIM'])
    plot(cp2)

    neighbours = select_neighbours(X_train,
                                   X_train[0],
                                   variable_names=variable_names,
                                   selected_variables=variable_names,
                                   n=15)
    cp3 = individual_variable_profile(explainer_rf,
    data = np.array(trainX)
    yt = np.array(trainY)
    labels = yt.ravel()
    variable_names = data_for_prediction.columns

    predict_function = lambda X: RFModel.predict_proba(X)[::, 1]
    explainer_rf = explain(RFModel,
                           variable_names,
                           data,
                           y=labels,
                           predict_function=predict_function,
                           label="sRNARFTarget")

    #cp_profile = individual_variable_profile(explainer_rf, data_for_prediction, y = 1, grid_points = 100)
    cp_profile = individual_variable_profile(explainer_rf,
                                             data_for_prediction,
                                             grid_points=200,
                                             variables=[sys.argv[3]])
    plot(cp_profile,
         show_profiles=True,
         show_residuals=True,
         show_rugs=True,
         height=700,
         width=750,
         yaxis_title='Prediction probablity for class 1',
         plot_title='Ceteris paribus profiles of feature ' + sys.argv[3] +
         ' for ' + sys.argv[1] + '-' + sys.argv[2] + ' pair interaction',
         color='blue',
         size=3,
         alpha=0.5,
         color_residuals='red',
         size_residuals=20,
Exemple #20
0
 def test_iris_classification_7(self):
     X_data = pd.DataFrame(self.X[5])
     with self.assertRaises(ValueError):
         cp_profile = individual_variable_profile(self.explainer_rf, X_data,
                                                  self.y[5])
Exemple #21
0
 def test_categorical_1(self):
     cp = individual_variable_profile(self.explainer_cat, self.x.iloc[:10],
                                      self.y.iloc[:10])
     self.assertEqual(len(cp.new_observation_true), 10)
     self.assertIn('female', list(cp.profile['sex']))
     self.assertIn('sex', list(cp.profile['_vname_']))
Exemple #22
0
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


def keras_model():
    estimators = [('scaler', StandardScaler()),
                  ('mlp',
                   KerasRegressor(build_fn=network_architecture, epochs=200))]
    model = Pipeline(estimators)
    model.fit(x_train, y_train)
    return model, x_train, y_train, boston.feature_names


if __name__ == "__main__":
    model, x_train, y_train, var_names = keras_model()
    explainer_keras = explain(model,
                              var_names,
                              x_train,
                              y_train,
                              label='KerasMLP')
    cp = individual_variable_profile(
        explainer_keras,
        x_train[:10],
        y=y_train[:10],
        variables=["CRIM", "ZN", "AGE", "INDUS", "B"])
    plot(cp,
         show_residuals=True,
         selected_variables=["CRIM", "ZN", "AGE", "B"],
         show_observations=True,
         show_rugs=True)