예제 #1
0
def explain(skater_exp: Explanation, training_df, test_df, explanation_target,
            prefix_target):
    job = skater_exp.job
    model = joblib.load(job.predictive_model.model_path)
    model = model[0]

    features = list(training_df.drop(['trace_id', 'label'], 1).columns.values)
    interpreter = Interpretation(training_df, feature_names=features)
    X_train = training_df.drop(['trace_id', 'label'], 1)
    Y_train = training_df['label'].values

    model_inst = InMemoryModel(model.predict,
                               examples=X_train,
                               model_type=model._estimator_type,
                               unique_values=[1, 2],
                               feature_names=features,
                               target_names=['label'])
    surrogate_explainer = interpreter.tree_surrogate(model_inst, seed=5)

    surrogate_explainer.fit(X_train,
                            Y_train,
                            use_oracle=True,
                            prune='post',
                            scorer_type='default')
    surrogate_explainer.class_names = features

    viz = dtreeviz(surrogate_explainer.estimator_,
                   X_train,
                   Y_train,
                   target_name='label',
                   feature_names=features,
                   orientation="TD",
                   class_names=list(surrogate_explainer.class_names),
                   fancy=True,
                   X=None,
                   label_fontsize=12,
                   ticks_fontsize=8,
                   fontname="Arial")
    name = create_unique_name("skater_plot.svg")
    viz.save(name)
    if os.path.getsize(name) > 15000000:
        return 'The file size is too big'
    f = open(name, "r")
    response = f.read()
    os.remove(name)
    if os.path.isfile(name.split('.svg')[0]):
        os.remove(name.split('.svg')[0])

    return response
예제 #2
0
def analyze(model_prediction, X_train, y_train):
    skater_model = InMemoryModel(model_prediction, examples=X_train)
    interpreter = Interpretation(X_train, feature_names=X_train.columns)

    surrogate_explainer = interpreter.tree_surrogate(skater_model, seed=5)
    surrogate_explainer.fit(X_train,
                            y_train,
                            use_oracle=True,
                            prune='post',
                            scorer_type='default')
    surrogate_explainer.plot_global_decisions(
        colors=['coral', 'lightsteelblue', 'darkkhaki'],
        file_name='simple_tree_pre.png')

    return Image(filename='simple_tree_pre.png')
예제 #3
0
    def handle(self, *args, **kwargs):
        # get model
        TARGET_MODEL = 71
        job = Job.objects.filter(pk=TARGET_MODEL)[0]
        model = joblib.load(job.predictive_model.model_path)[0]
        # load data
        training_df, test_df = get_encoded_logs(job)

        features = list(
            training_df.drop(['trace_id', 'label'], 1).columns.values)
        interpreter = Interpretation(training_df, feature_names=features)
        X_train = training_df.drop(['trace_id', 'label'], 1)
        Y_train = training_df['label'].values

        model_inst = InMemoryModel(model.predict,
                                   examples=X_train,
                                   model_type='classifier',
                                   unique_values=[1, 2],
                                   feature_names=features,
                                   target_names=['label'])
        surrogate_explainer = interpreter.tree_surrogate(model_inst, seed=5)

        surrogate_explainer.fit(X_train,
                                Y_train,
                                use_oracle=True,
                                prune='post',
                                scorer_type='default')
        surrogate_explainer.class_names = features

        viz = dtreeviz(surrogate_explainer.estimator_,
                       X_train,
                       Y_train,
                       target_name='label',
                       feature_names=features,
                       orientation="TD",
                       class_names=list(surrogate_explainer.class_names),
                       fancy=True,
                       X=None,
                       label_fontsize=12,
                       ticks_fontsize=8,
                       fontname="Arial")
        viz.save("skater_plot_train_2_2.svg")
예제 #4
0
            ## To avoid clutter I only produce plots for gradient boosting and one fold only
            if (fold == 2 and modelno == 5):
                # Plot PDPs of variable "alm" since it is the most important feature, for 3 of the 4 models
                ## alm not the most important feature for Gaussian Naive bayes tho, explain that
                # for other variables just change the name
                # for other models just change the number
                # interpreter.partial_dependence.plot_partial_dependence(["alm"],
                #                                                        pyint_model, grid_resolution=30,
                #                                                        with_variance=True)

                # # PDP interaction between two variables, for each class
                # interpreter.partial_dependence.plot_partial_dependence([("nuc", "mit")], pyint_model,
                #                                                        grid_resolution=10)

                surrogate_explainer = interpreter.tree_surrogate(
                    oracle=pyint_model, seed=5, max_depth=4)
                surrogate_explainer.fit(train_data,
                                        train_target,
                                        use_oracle=True,
                                        prune='pre',
                                        scorer_type='default')
                surrogate_explainer.plot_global_decisions(
                    file_name='mlp_tree_class_md4.png', fig_size=(8, 8))

                #show_in_notebook('simple_tree_pre.png', width=400, height=300)

                # This initialization, although showcased on the docs, does not work
                # surrogate_explainer = interpreter.tree_surrogate(estimator_type_='classifier',
                #                                                 feature_names=featureNames[1:9],
                #                                                 class_names=["CYT", "ME3", "MIT", "NUC"], seed=5)
                # y_hat_train = model.predict(train_data)