def Explanation_Dashboard(global_explanation,
                          model=None,
                          x_train=pd.DataFrame(),
                          x_test=pd.DataFrame(),
                          y_train=pd.DataFrame(),
                          y_test=pd.DataFrame(),
                          explantion_data=None,
                          design=None):
    if globals_file.imported_model_files:
        model, x_train, x_test, y_train, y_test = get_best_pipeline(design)
    if model == None:
        return "Exported Pipeline is missing"
    if x_train.empty:
        return "Training features is missing"
    if x_test.empty:
        return "Testing features is missing"
    if y_train.empty:
        return "Training Target is missing"
    if y_test.empty:
        return "Testing Target is missing"

    if explantion_data == 'Training':
        ExplanationDashboard(global_explanation,
                             model,
                             dataset=x_train,
                             true_y=y_train)
    else:
        ExplanationDashboard(global_explanation,
                             model,
                             dataset=x_test,
                             true_y=y_test)
    def test_explanation_dashboard_many_columns(self):
        X, y = make_classification(n_features=2000)

        # Split data into train and test
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=0)
        classes = np.unique(y_train).tolist()
        feature_names = ["col" + str(i) for i in list(range(X_train.shape[1]))]
        X_train = pd.DataFrame(X_train, columns=feature_names)
        X_test = pd.DataFrame(X_test, columns=feature_names)
        knn = sklearn.neighbors.KNeighborsClassifier()
        knn.fit(X_train, y_train)

        model_task = ModelTask.Classification
        explainer = MimicExplainer(knn,
                                   X_train,
                                   LGBMExplainableModel,
                                   model_task=model_task)
        global_explanation = explainer.explain_global(X_test)

        ExplanationDashboard(explanation=global_explanation,
                             model=knn,
                             dataset=X_test,
                             true_y=y_test,
                             classes=classes)
Beispiel #3
0
def Explanation_Dashboard(global_explanation,
                          model,
                          x_train,
                          x_test,
                          y_train,
                          y_test,
                          explantion_data=None):
    if explantion_data == 'Training':
        ExplanationDashboard(global_explanation,
                             model,
                             dataset=x_train,
                             true_y=y_train)
    else:
        ExplanationDashboard(global_explanation,
                             model,
                             dataset=x_test,
                             true_y=y_test)
 def test_local_explanation(self, mimic_explainer):
     # Validate visualizing ExplanationDashboard with a local explanation
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model, x_train, LGBMExplainableModel,
                                 features=feature_names, classes=target_names)
     explanation = explainer.explain_local(x_test)
     ExplanationDashboard(explanation, model, dataset=x_test, true_y=y_test)
Beispiel #5
0
 def test_raw_timestamp_explanation(self, mimic_explainer):
     df = retrieve_dataset(
         'insurance_claims.csv',
         na_values='?',
         parse_dates=['policy_bind_date', 'incident_date'])
     label = 'fraud_reported'
     df_y = df[label]
     df_X = df.drop(columns=label)
     x_train, x_test, y_train, y_test = train_test_split(df_X,
                                                         df_y,
                                                         test_size=0.2,
                                                         random_state=7)
     str_cols = df_X.select_dtypes(
         exclude=[np.number, np.datetime64]).columns.tolist()
     dt_cols = df_X.select_dtypes(include=[np.datetime64]).columns.tolist()
     numeric_cols = df_X.select_dtypes(include=[np.number]).columns.tolist()
     transforms_list = []
     for str_col in str_cols:
         transforms_list.append(
             (str_col,
              Pipeline(steps=[('imputer',
                               SimpleImputer(strategy='most_frequent')
                               ), ('ohe', OneHotEncoder(sparse=False))]),
              [str_col]))
     for numeric_col in numeric_cols:
         transforms_list.append(
             (numeric_col,
              Pipeline(steps=[('imputer', SimpleImputer(
                  strategy='mean')), ('scaler', StandardScaler())]),
              [numeric_col]))
     for dt_col in dt_cols:
         transforms_list.append(
             (dt_col, Pipeline(steps=[('scaler', StandardScaler())]),
              [dt_col]))
     transformations = ColumnTransformer(transforms_list)
     x_train_transformed = transformations.fit_transform(x_train)
     model = create_lightgbm_classifier(x_train_transformed, y_train)
     model_task = ModelTask.Classification
     features = df_X.columns.tolist()
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 transformations=transformations,
                                 features=features,
                                 model_task=model_task)
     explanation = explainer.explain_global(x_train)
     dashboard_pipeline = Pipeline(steps=[('preprocess',
                                           transformations), ('model',
                                                              model)])
     ExplanationDashboard(explanation,
                          dashboard_pipeline,
                          dataset=x_train,
                          true_y=y_train)
x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0)

clf = svm.SVC(gamma=0.001, C=100., probability=True)
model = clf.fit(x_train, y_train)

explainer = TabularExplainer(model,
                             x_train,
                             features=feature_names,
                             classes=classes)

global_explanation = explainer.explain_global(x_test)


instance_num = 0
local_explanation = explainer.explain_local(x_test[instance_num, :])

prediction_value = clf.predict(x_test)[instance_num]

sorted_local_importance_values = local_explanation.get_ranked_local_values()[
    prediction_value]
sorted_local_importance_names = local_explanation.get_ranked_local_names()[
    prediction_value]


ExplanationDashboard(global_explanation, model, dataset=x_test, true_y=y_test)
ModelPerformanceDashboard(model, dataset=x_test, true_y=y_test)

input("Press Enter to continue...")