コード例 #1
0
    def test_explain_model_pandas_string(self, tabular_explainer):
        np.random.seed(777)
        num_rows = 100
        num_ints = 10
        num_cols = 4
        split_ratio = 0.2
        A = np.random.randint(num_ints, size=num_rows)
        B = np.random.random(size=num_rows)
        C = np.random.randn(num_rows)
        cat = np.random.choice(['New York', 'San Francisco', 'Los Angeles',
                                'Atlanta', 'Denver', 'Chicago', 'Miami', 'DC', 'Boston'], 100)
        label = np.random.choice([0, 1], num_rows)
        df = pd.DataFrame(data={'A': A, 'B': B, 'C': C, 'cat': cat, 'label': label})
        df.cat = df.cat.astype('category')
        X = df.drop('label', axis=1)
        y = df.label

        x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=split_ratio)

        clf = create_lightgbm_classifier(x_train, y_train)

        explainer = tabular_explainer(clf, initialization_examples=x_train, features=x_train.columns)
        global_explanation = explainer.explain_global(x_test)
        local_shape = global_explanation._local_importance_values.shape
        num_rows_expected = split_ratio * num_rows
        assert local_shape == (2, num_rows_expected, num_cols)
        assert len(global_explanation.global_importance_values) == num_cols
        assert global_explanation.num_features == num_cols
コード例 #2
0
 def test_raw_timestamp_explanation(self, mimic_explainer):
     df = retrieve_dataset(
         'insurance_claims.csv',
         na_values='?',
         parse_dates=['policy_bind_date', 'incident_date'])
     label = 'fraud_reported'
     df_y = df[label]
     df_X = df.drop(columns=label)
     x_train, x_test, y_train, y_test = train_test_split(df_X,
                                                         df_y,
                                                         test_size=0.2,
                                                         random_state=7)
     str_cols = df_X.select_dtypes(
         exclude=[np.number, np.datetime64]).columns.tolist()
     dt_cols = df_X.select_dtypes(include=[np.datetime64]).columns.tolist()
     numeric_cols = df_X.select_dtypes(include=[np.number]).columns.tolist()
     transforms_list = []
     for str_col in str_cols:
         transforms_list.append(
             (str_col,
              Pipeline(steps=[('imputer',
                               SimpleImputer(strategy='most_frequent')
                               ), ('ohe', OneHotEncoder(sparse=False))]),
              [str_col]))
     for numeric_col in numeric_cols:
         transforms_list.append(
             (numeric_col,
              Pipeline(steps=[('imputer', SimpleImputer(
                  strategy='mean')), ('scaler', StandardScaler())]),
              [numeric_col]))
     for dt_col in dt_cols:
         transforms_list.append(
             (dt_col, Pipeline(steps=[('scaler', StandardScaler())]),
              [dt_col]))
     transformations = ColumnTransformer(transforms_list)
     x_train_transformed = transformations.fit_transform(x_train)
     model = create_lightgbm_classifier(x_train_transformed, y_train)
     model_task = ModelTask.Classification
     features = df_X.columns.tolist()
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 transformations=transformations,
                                 features=features,
                                 model_task=model_task)
     explanation = explainer.explain_global(x_train)
     dashboard_pipeline = Pipeline(steps=[('preprocess',
                                           transformations), ('model',
                                                              model)])
     ExplanationDashboard(explanation,
                          dashboard_pipeline,
                          datasetX=x_train,
                          trueY=y_train)
コード例 #3
0
    def test_explain_model_lightgbm_multiclass(self, tabular_explainer, iris):
        # Fit a lightgbm model
        model = create_lightgbm_classifier(iris[DatasetConstants.X_TRAIN], iris[DatasetConstants.Y_TRAIN])

        # Create tabular explainer
        exp = tabular_explainer(model, iris[DatasetConstants.X_TRAIN], features=iris[DatasetConstants.FEATURES],
                                classes=iris[DatasetConstants.CLASSES])
        test_logger.info('Running explain global for test_explain_model_lightgbm_multiclass')
        explanation = exp.explain_global(iris[DatasetConstants.X_TEST])
        assert len(explanation.local_importance_values[0]) == len(iris[DatasetConstants.X_TEST])
        assert explanation.num_examples == len(iris[DatasetConstants.X_TEST])
        assert len(explanation.local_importance_values) == len(iris[DatasetConstants.CLASSES])
        assert explanation.num_classes == len(iris[DatasetConstants.CLASSES])
コード例 #4
0
    def test_explain_model_lightgbm_binary(self, tabular_explainer, iris):
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, _ = train_test_split(X,
                                                       y,
                                                       test_size=0.2,
                                                       random_state=7)
        # Fit a tree model
        model = create_lightgbm_classifier(x_train, y_train)

        classes = ["<50k", ">50k"]
        # Create local tabular explainer without run history
        exp = tabular_explainer(model,
                                x_train,
                                features=X.columns.values,
                                classes=classes)
        test_logger.info(
            'Running explain global for test_explain_model_lightgbm_binary')
        explanation = exp.explain_global(x_test)
        assert len(explanation.local_importance_values[0]) == len(x_test)
        assert len(explanation.local_importance_values) == len(classes)
コード例 #5
0
 def create_model(x_train, y_train):
     return create_lightgbm_classifier(x_train, y_train)