def explain_wrap(index, columns):
    print("DOING {}".format(index))
    global count
    x, y = X_valid[index], Y_valid[index]
    explainer_xgb = explain(xg_predicted, data=X_train, y=Y_train, label="XGBoost model",
        predict_function=lambda X: xgmodel.predict_proba(X.to_numpy())[::, 1], variable_names=column_names)
    explainer_linear = explain(lin_predicted, data=X_train, y=Y_train, label="Logistic model",
        predict_function=lambda X: logmodel.predict_proba(X.to_numpy())[::, 1], variable_names=column_names)
    cp_xgb = individual_variable_profile(explainer_xgb, x, y)
    cp_lin = individual_variable_profile(explainer_linear, x, y)
    plot(cp_xgb, cp_lin, selected_variables=columns, destination="browser", show_observations=False)
    #IFrame(src="./_plot_files/plots{}.html".format(count), width=700, height=600)
    #with open("_plot_files/plots{}.html".format(count), 'r') as myfile:
#        display(HTML(myfile.read()))
    count += 1
Esempio n. 2
0
    def setUp(self):
        df = pd.read_csv(os.path.join(DATASETS_DIR, 'insurance.csv'))

        self.x = df.drop(['charges'], inplace=False, axis=1)

        self.y = df['charges']

        var_names = list(self.x)

        # We create the preprocessing pipelines for both numeric and categorical data.
        numeric_features = ['age', 'bmi', 'children']
        numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

        categorical_features = ['sex', 'smoker', 'region']
        categorical_transformer = Pipeline(
            steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[('num', numeric_transformer, numeric_features),
                          ('cat', categorical_transformer,
                           categorical_features)])

        # Append classifier to preprocessing pipeline.
        # Now we have a full prediction pipeline.
        clf = Pipeline(
            steps=[('preprocessor',
                    preprocessor), ('classifier', RandomForestRegressor())])

        clf.fit(self.x, self.y)

        self.explainer_cat = explain(clf,
                                     var_names,
                                     self.x,
                                     self.y,
                                     label="categorical_model")
Esempio n. 3
0
    def setUp(self):
        boston = datasets.load_boston()
        x = boston.data
        y = boston.target
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=42)

        def network_architecture():
            model = Sequential()
            model.add(Dense(640, input_dim=x.shape[1]))
            model.add(Activation('tanh'))
            model.add(Dense(320))
            model.add(Activation('tanh'))
            model.add(Dense(1))
            model.compile(loss='mean_squared_error', optimizer='adam')
            return model

        def keras_model():
            estimators = [('scaler', StandardScaler()),
                          ('mlp',
                           KerasRegressor(build_fn=network_architecture,
                                          epochs=20))]
            model = Pipeline(estimators)
            model.fit(x_train, y_train)
            return model, x_train, y_train, boston.feature_names

        model, self.x_train, self.y_train, self.var_names = keras_model()
        self.explainer_keras = explain(model,
                                       self.var_names,
                                       self.x_train,
                                       self.y_train,
                                       label='KerasMLP')
Esempio n. 4
0
 def test_explainer_16(self):
     # predict function for array
     explainer = explain(self.rf_model,
                         variable_names=self.var_names,
                         data=self.X[:10],
                         y=self.y[:10])
     self.assertEqual(len(explainer.predict_fun(pd.DataFrame(self.X[:10]))),
                      10)
Esempio n. 5
0
 def test_explainer_14(self):
     # data for one observation - 1D array
     explainer = explain(self.rf_model,
                         variable_names=["a", "b"],
                         data=np.array(["cc", "dd"]))
     np.testing.assert_array_equal(
         explainer.data, pd.DataFrame.from_dict({
             "a": ["cc"],
             "b": ["dd"]
         }))
def create_cp(model, label, idx, path='../data/heloc_dataset_v1.csv'):
    data = pd.read_csv(path)
    column_list = prepare_data()
    explainer = explain(
        model,
        data=data[column_list],
        y=data.RiskPerformance,
        label=label,
        predict_function=lambda X: model.predict_proba(X)[::, 1])
    return individual_variable_profile(explainer, data[column_list].loc[idx],
                                       data.loc[idx, 'RiskPerformance'])
Esempio n. 7
0
    def ceterisParibus_connector(self, feature, *arg):
        from ceteris_paribus.plots.plots import plot

        query_instance = dict(s.split(':') for s in arg)

        #print(feature)

        #prepare data instance (nparray)
        categories = self.getCategoricalFeatures()
        np_instance = []
        for f in self.featureNames:
            if f in categories:
                np_instance.append(query_instance[f])
            else:
                np_instance.append(float(query_instance[f]))
        #print(np_instance)

        prediction_proba = self.model.predict_proba(
            pd.DataFrame([query_instance]))[0]
        prediction = np.where(
            prediction_proba == np.amax(prediction_proba))[0][0]
        #print(prediction)

        explainer = explain(
            self.model,
            variable_names=self.featureNames,
            data=self.X_train,
            y=self.Y_train,
            label='Model',
            predict_function=lambda x: self.model.predict_proba(x)[::, 1])

        i = individual_variable_profile(explainer, np.array(np_instance),
                                        np.array([prediction]))

        p = plot(i,
                 selected_variables=[feature],
                 width=700,
                 height=800,
                 size=4)

        options = {'height': '500', 'width': '600'}

        imgkit.from_file('_plot_files/plots' + p + '.html',
                         'temp/plots' + p + '.jpg',
                         options=options)

        self.certainty = "I am 100 percent sure about the graph."
        return ("temp/plots" + str(p) + ".jpg")
Esempio n. 8
0
    def setUp(self):
        boston = datasets.load_boston()

        X = boston['data']
        y = boston['target']

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=0.33, random_state=42)

        (model, data, labels, self.variable_names) = random_forest_regression(
            self.X_train, self.y_train, list(boston['feature_names']))
        self.explainer_rf = explain(model,
                                    self.variable_names,
                                    data,
                                    labels,
                                    label="rf_model")
Esempio n. 9
0
    def setUp(self):
        self.iris = load_iris()

        self.X = self.iris['data']
        self.y = self.iris['target']
        X_train, X_test, y_train, y_test = train_test_split(self.X,
                                                            self.y,
                                                            test_size=0.33,
                                                            random_state=42)
        (model, data, labels, variable_names) = random_forest_classifier(
            X_train, y_train, list(self.iris['feature_names']))
        predict_function = lambda X: model.predict_proba(X)[::, 0]
        self.explainer_rf = explain(model,
                                    variable_names,
                                    data,
                                    labels,
                                    predict_function=predict_function,
                                    label="rf_model")
Esempio n. 10
0
 def test_explainer_5(self):
     # raises warning
     explainer = explain(self.rf_model, [])
     self.assertEqual(explainer.label, "RandomForestRegressor")
Esempio n. 11
0
 def test_explainer_2(self):
     model = MagicMock(predict=id)
     explainer = explain(model, data=pd.DataFrame())
     self.assertEqual(explainer.predict_fun, id)
def random_forest_regression():
    # Create linear regression object
    rf_model = ensemble.RandomForestRegressor(n_estimators=100,
                                              random_state=42)

    # Train the model using the training set
    rf_model.fit(X_train, y_train)

    # model, data, labels, variable_names
    return rf_model, X_train, y_train, list(boston['feature_names'])


if __name__ == "__main__":
    (model, data, labels, variable_names) = random_forest_regression()
    explainer_rf = explain(model, variable_names, data, labels)

    cp_profile = individual_variable_profile(explainer_rf,
                                             X_train[0],
                                             y=y_train[0],
                                             variables=['TAX', 'CRIM'])
    plot(cp_profile)

    sample = select_sample(X_train, n=3)
    cp2 = individual_variable_profile(explainer_rf,
                                      sample,
                                      variables=['TAX', 'CRIM'])
    plot(cp2)

    neighbours = select_neighbours(X_train,
                                   X_train[0],
Esempio n. 13
0
    data_for_prediction = datarow.iloc[:, 2:]

    trainX = pickle.load(
        open('./PickledModelData/RFData/trainX_sRNARFTarget.pkl', 'rb'))
    trainY = pickle.load(
        open('./PickledModelData/RFData/trainY_sRNARFTarget.pkl', 'rb'))

    data = np.array(trainX)
    yt = np.array(trainY)
    labels = yt.ravel()
    variable_names = data_for_prediction.columns

    predict_function = lambda X: RFModel.predict_proba(X)[::, 1]
    explainer_rf = explain(RFModel,
                           variable_names,
                           data,
                           y=labels,
                           predict_function=predict_function,
                           label="sRNARFTarget")

    #cp_profile = individual_variable_profile(explainer_rf, data_for_prediction, y = 1, grid_points = 100)
    cp_profile = individual_variable_profile(explainer_rf,
                                             data_for_prediction,
                                             grid_points=200,
                                             variables=[sys.argv[3]])
    plot(cp_profile,
         show_profiles=True,
         show_residuals=True,
         show_rugs=True,
         height=700,
         width=750,
         yaxis_title='Prediction probablity for class 1',
Esempio n. 14
0
                                                  random_state=42)
    gb_model.fit(x, y)
    return gb_model, x, y, var_names


def supported_vector_machines_model():
    svm_model = svm.SVR(C=0.01, gamma='scale')
    svm_model.fit(x, y)
    return svm_model, x, y, var_names


if __name__ == "__main__":
    (linear_model, data, labels, variable_names) = linear_regression_model()
    (gb_model, _, _, _) = gradient_boosting_model()
    (svm_model, _, _, _) = supported_vector_machines_model()

    explainer_linear = explain(linear_model, variable_names, data, y)
    explainer_gb = explain(gb_model, variable_names, data, y)
    explainer_svm = explain(svm_model, variable_names, data, y)

    cp_profile = individual_variable_profile(explainer_linear, x[0], y[0])
    plot(cp_profile, show_residuals=True)

    sample_x, sample_y = select_sample(x, y, n=10)
    cp2 = individual_variable_profile(explainer_gb, sample_x, y=sample_y)

    cp3 = individual_variable_profile(explainer_gb, x[0], y[0])
    plot(cp3, show_residuals=True)

    plot(cp_profile, cp3, show_residuals=True)
Esempio n. 15
0
X = iris['data']
y = iris['target']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

print(iris['feature_names'])


def random_forest_classifier():
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

    rf_model.fit(X_train, y_train)

    return rf_model, X_train, y_train, iris['feature_names']


if __name__ == "__main__":
    (model, data, labels, variable_names) = random_forest_classifier()
    predict_function = lambda X: model.predict_proba(X)[::, 0]
    explainer_rf = explain(model,
                           variable_names,
                           data,
                           labels,
                           predict_function=predict_function)
    cp_profile = individual_variable_profile(explainer_rf, X[1], y=y[1])
    plot(cp_profile)
Esempio n. 16
0
 def test_explainer_17(self):
     # predict function for dataframe
     boston_df = pd.DataFrame(self.X[:10])
     explainer = explain(self.rf_model, data=boston_df)
     self.assertEqual(len(explainer.predict_fun(boston_df)), 10)
Esempio n. 17
0
 def test_explainer_7(self):
     # no labels given
     with self.assertRaises(ValueError) as c:
         explainer = explain(self.rf_model)
Esempio n. 18
0
    return gb_model, x, y, var_names


def supported_vector_machines_model():
    svm_model = svm.SVR(C=0.01, gamma='scale', kernel='poly')
    svm_model.fit(x, y)
    return svm_model, x, y, var_names


if __name__ == "__main__":

    (linear_model, data, labels, variable_names) = linear_regression_model()
    (gb_model, _, _, _) = gradient_boosting_model()
    (svm_model, _, _, _) = supported_vector_machines_model()

    explainer_linear = explain(linear_model, variable_names, data, y)
    explainer_gb = explain(gb_model, variable_names, data, y)
    explainer_svm = explain(svm_model, variable_names, data, y)

    # single profile
    cp_1 = individual_variable_profile(explainer_gb, x[0], y[0])
    plot(cp_1,
         destination="notebook",
         selected_variables=["bmi"],
         print_observations=False)

    # local fit
    neighbours_x, neighbours_y = select_neighbours(x, x[10], y=y, n=10)
    cp_2 = individual_variable_profile(explainer_gb, neighbours_x,
                                       neighbours_y)
    plot(cp_2,
Esempio n. 19
0
 def test_explainer_15(self):
     # wrong number of variables
     with self.assertRaises(ValueError):
         explainer = explain(self.rf_model,
                             variable_names=["a", "b", "c"],
                             data=self.df.values)
Esempio n. 20
0
 def test_explainer_11(self):
     explainer = explain(self.rf_model,
                         variable_names=["a", "b"],
                         y=pd.DataFrame(np.array([1, 4])))
     np.testing.assert_array_equal(explainer.y, pd.Series([1, 4]))
Esempio n. 21
0
 def test_explainer_9(self):
     explainer = explain(self.rf_model,
                         variable_names=["a", "b", "c"],
                         y=[1, 2, 3])
     np.testing.assert_array_equal(explainer.y, pd.Series([1, 2, 3]))
Esempio n. 22
0
 def test_explainer_8(self):
     # labels imputed from the dataframe
     explainer = explain(self.rf_model, data=self.df)
     self.assertEqual(explainer.var_names, ['a', 'b'])
Esempio n. 23
0
 def test_explainer_1(self):
     model = MagicMock()
     delattr(model, 'predict')
     with self.assertRaises(ValueError) as c:
         explain(model, self.var_names)
Esempio n. 24
0
 def test_explainer_4(self):
     label = "xyz"
     explainer = explain(self.rf_model, [], label=label)
     self.assertEqual(explainer.label, label)
Esempio n. 25
0
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


def keras_model():
    estimators = [('scaler', StandardScaler()),
                  ('mlp',
                   KerasRegressor(build_fn=network_architecture, epochs=200))]
    model = Pipeline(estimators)
    model.fit(x_train, y_train)
    return model, x_train, y_train, boston.feature_names


if __name__ == "__main__":
    model, x_train, y_train, var_names = keras_model()
    explainer_keras = explain(model,
                              var_names,
                              x_train,
                              y_train,
                              label='KerasMLP')
    cp = individual_variable_profile(
        explainer_keras,
        x_train[:10],
        y=y_train[:10],
        variables=["CRIM", "ZN", "AGE", "INDUS", "B"])
    plot(cp,
         show_residuals=True,
         selected_variables=["CRIM", "ZN", "AGE", "B"],
         show_observations=True,
         show_rugs=True)
Esempio n. 26
0
 def test_explainer_6(self):
     model = MagicMock()
     model.__str__.return_value = 'xyz'
     # raises warning
     explainer = explain(model, [])
     self.assertEqual(explainer.label, "unlabeled_model")
Esempio n. 27
0
 def test_explainer_12(self):
     # data from dataframe
     explainer = explain(self.rf_model, data=self.df)
     np.testing.assert_array_equal(explainer.data, self.df)
Esempio n. 28
0
 def test_explainer_3(self):
     explainer = explain(self.rf_model, [], predict_function=sum)
     self.assertEqual(explainer.predict_fun, sum)
numeric_features = ['age', 'bmi', 'children']
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

categorical_features = ['sex', 'smoker', 'region']
categorical_transformer = Pipeline(
    steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[(
    'num', numeric_transformer,
    numeric_features), ('cat', categorical_transformer, categorical_features)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor',
                       preprocessor), ('classifier', RandomForestRegressor())])

clf.fit(x, y)

from ceteris_paribus.explainer import explain

explainer_cat = explain(clf, var_names, x, y, label="categorical_model")

from ceteris_paribus.profiles import individual_variable_profile

cp_cat = individual_variable_profile(explainer_cat, x.iloc[:10], y.iloc[:10])

cp_cat.print_profile()
plot(cp_cat)

plot(cp_cat, color="smoker")
Esempio n. 30
0
 def test_explainer_13(self):
     # data from numpy array
     explainer = explain(self.rf_model,
                         variable_names=["a", "b"],
                         data=self.df.values)
     np.testing.assert_array_equal(explainer.data, self.df)