def pdp_interact_explain(X, y, feature):
    import category_encoders as ce
    from sklearn.pipeline import make_pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.ensemble import RandomForestClassifier
    from pdpbox.pdp import pdp_interact, pdp_interact_plot

    # Encode, impute as needed
    X_encoded = ce.OrdinalEncoder().fit_transform(X)
    X_processed = SimpleImputer().fit_transform(X_encoded)

    # Pick a model and fit the data
    pdp_model = RandomForestClassifier(n_estimators=200,
                                       n_jobs=-1,
                                       random_state=6)
    pdp_model.fit(X_processed, y)

    # The actual plotting
    pdp_interact = pdp_interact(model=pdp_model,
                                dataset=X_encoded,
                                model_features=X_encoded.columns,
                                features=feature)
    # There's a TypeError in the pdpinteract code that prevents the axes from getting labels
    # and I can't be bothered to go fix their mistakes.
    # This ignores it and lets it continue with the plotting
    try:
        pdp_interact_plot(pdp_interact,
                          feature_names=feature,
                          plot_type='contour')
    except:
        pass
예제 #2
0
    def plot_2d_pdp(self, features_2d_plot):
        # creating data to plot
        inter = pdp.pdp_interact(model=self.model,
                                 dataset=self.x,
                                 model_features=list(self.x.columns),
                                 features=features_2d_plot)

        # plot it
        plot_params = {
            # plot title and subtitle
            'title_fontsize': 15,
            'subtitle_fontsize': 12,
            # color for contour line
            'contour_color': 'white',
            'font_family': 'Arial',
            # matplotlib color map for interact plot
            'cmap': 'viridis',
            # fill alpha for interact plot
            'inter_fill_alpha': 0.8,
            # fontsize for interact plot text
            'inter_fontsize': 9,
        }
        pdp.pdp_interact_plot(pdp_interact_out=inter,
                              feature_names=features_2d_plot,
                              plot_type='contour',
                              plot_params=plot_params)

        # saving the plot
        plt.tight_layout()
        plt.savefig(self.out + '/dep_plot_2d.jpg', dpi=300)
        plt.close()
 def show_PDP_interact(self, features=[]):
     for f in features:
         pdp_interact = pdp.pdp_interact(self.model,self.X_train,
                                model_features=self.feature_names,
                                         features=f)
         pdp.pdp_interact_plot(pdp_interact,feature_names=f)
         plt.xticks(rotation=90)
         plt.show()
def part_plot_2D(model, total_features, X_val, y_val, feature1, feature2):
    inter1 = pdp.pdp_interact(model=model,
                              dataset=X_val,
                              model_features=total_features,
                              features=[feature1, feature2])
    pdp.pdp_interact_plot(pdp_interact_out=inter1,
                          feature_names=[feature1, feature2],
                          plot_type='grid')
    plt.show()
예제 #5
0
def two_dim_pdp(f):
    ''' Function for plotting a two dimension PDP'''
    inter = pdp.pdp_interact(model=rf,
                             dataset=X_train,
                             model_features=X_train.columns,
                             features=f)
    pdp.pdp_interact_plot(pdp_interact_out=inter,
                          feature_names=f,
                          plot_type='grid')
    plt.show()
예제 #6
0
 def pdp_plot_bivariate(self, model, X_val, feature_pair):
     ''' pdp plot for feature pair
     model: fitted model
     X_val: validation dataset
     feature_pair : pair of feature (list)
     '''
     partial_plot  =  pdp.pdp_interact(model, X_val,
                                                  X_val.columns.tolist(), feature_pair)
     pdp.pdp_interact_plot(partial_plot, feature_pair, plot_type='contour')
     plt.show()
예제 #7
0
def interaction(model, X, features, type = 'grid'):
    """
    plot interaction between features
    """
    #instantiate interaction vairable
    interaction = pdp_interact(model = model,
                            dataset = X,
                            model_features = X.columns,
                            features = features)
    #plot interactions
    pdp_interact_plot(interaction, plot_type = type, feature_names = features)
def plot_2D_partial_dependency(fitted_model, X_test : pd.DataFrame, model_features : list, features : list , plot_type = 'contour'):
	""" have an error with the matplolib version 3.0.0 """

	# Create the data that we will plot
	pdp_obj = pdp.pdp_interact(model = fitted_model,
								dataset = X_test,
								model_features = model_features,
								features = features)

	# plot it
	pdp.pdp_interact_plot(pdp_interact_out = pdp_obj,
		feature_names = feature, plot_type = plot_type)
	plt.show()
예제 #9
0
 def test_grid(self, pdp_interact_out):
     fig, axes = pdp_interact_plot(pdp_interact_out=pdp_interact_out,
                                   feature_names=['age', 'fare'],
                                   plot_type='grid',
                                   x_quantile=True)
     assert type(fig) == matplotlib.figure.Figure
     assert sorted(axes.keys()) == ['pdp_inter_ax', 'title_ax']
     for k in axes.keys():
         assert type(axes[k]) == matplotlib.axes._subplots.Subplot
예제 #10
0
def pdp_2d(clf,X,Y,features_to_plot,label,plot_type='contour'):
    from pdpbox import pdp

    # Extract the classifier object from the clf multilearn object
    index = Y.columns.to_list().index(label)
    clf = clf.classifiers_[index]
    clf.verbose = False #Turn verbose off after this to tidy prints

    inter  =  pdp.pdp_interact(model=clf, dataset=X, model_features=X.columns.to_list(), features=features_to_plot,percentile_ranges=[(5,95),(5,95)])
    if(plot_type=='grid'):
        fig, ax = pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=features_to_plot, plot_type='grid',
                x_quantile=True,plot_pdp=True)
    elif(plot_type=='contour'):
        fig, ax = pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=features_to_plot, plot_type='contour')

    clf.verbose = True # reset

    return fig, ax
예제 #11
0
def ind_cond_exp(model_line, X_train, y_data):

    empty_list = []

    for col in X_train.columns:
        print(col)
        empty_list.append(col)

    from pdpbox.pdp import pdp_interact, pdp_interact_plot
    X_features = empty_list

    features = empty_list[1:3]

    interaction = pdp_interact(model=model_line,
                               dataset=X_train,
                               model_features=X_features,
                               features=features)

    #pdp_goals = pdp.pdp_isolate(model=model_line, dataset=X_train, model_features=X_features, feature='sqft_living')

    pdp_interact_plot(interaction, plot_type='grid', feature_names=features)

    import seaborn as sns

    pdp = interaction.pdp.pivot_table(
        values='preds', columns=features[0], index=features[1]
    )[::-1]  # Slice notation to reverse index order so y axis is ascending

    #plt.figure(figsize=(10,8))
    #  sns.heatmap(pdp, annot=True, fmt='.2f', cmap='viridis')
    #  plt.title('Partial Dependence on Interest Rate on Annual Income & Credit Score');

    #import plotly.graph_objs as go

    surface = go.Surface(x=pdp.columns, y=pdp.index, z=pdp.values)

    fig = go.Figure(surface)
    fig.show()
    ee.layout = html.Div([dcc.Graph(figure=fig)])
    print("done")

    return ee.index()
예제 #12
0
 def test_grid(self, pdp_interact_out):
     fig, axes = pdp_interact_plot(pdp_interact_out=pdp_interact_out,
                                   feature_names=['feat_67', 'feat_24'],
                                   plot_type='grid',
                                   x_quantile=True)
     assert type(fig) == matplotlib.figure.Figure
     assert sorted(axes.keys()) == ['pdp_inter_ax', 'title_ax']
     assert len(axes['pdp_inter_ax']) == 9
     assert type(axes['title_ax']) == matplotlib.axes._subplots.Subplot
     for i in range(9):
         assert type(
             axes['pdp_inter_ax'][i]) == matplotlib.axes._subplots.Subplot
예제 #13
0
def plot_pdp_interact(model, X_train, feats):
    """
    Function to plot dependency of target variable on the feature
    :param model: Trained model
    :param X_train: Datafram to get prediction of model from
    :param feats: List (size 2) of feature to plot target dependency for
    :param clusters: Flag to indicate is clusters are needed
    :param feat_name: Feature name to display on plot
    :return: partial dependency plot
    """
    x = get_sample(X_train, 1000)
    p = pdp.pdp_interact(model, x, x.columns, feats)
    return pdp.pdp_interact_plot(p, feats, plot_pdp=True)
def show_partial_dependence(model, val_X, features):
    '''
        Takes the model and dataframe for validation set (X)
        then plots the partial dependence plot
        For more on this, check https://www.kaggle.com/dansbecker/partial-plots?utm_medium=email&utm_source=mailchimp&utm_campaign=ml4insights 
    '''
    from matplotlib import pyplot as plt
    from pdpbox import pdp, get_dataset, info_plots  # Do I need get_dataset and info_plots???

    feature_names = [i for i in val_X.columns if val_X[i].dtype in [np.int64]]

    if (not type(features) == list):
        # Create the data that we will plot
        pdp_feature = pdp.pdp_isolate(model=model,
                                      dataset=val_X,
                                      model_features=feature_names,
                                      feature=features)

        # plot it
        pdp.pdp_plot(pdp_feature, feature)
        plt.show()

    elif (type(features) == list) & (len(features) == 2):
        # Similar to previous PDP plot except we use pdp_interact instead of pdp_isolate and pdp_interact_plot instead of pdp_isolate_plot
        inter1 = pdp.pdp_interact(model=model,
                                  dataset=val_X,
                                  model_features=feature_names,
                                  features=features)

        pdp.pdp_interact_plot(pdp_interact_out=inter1,
                              feature_names=features_to_plot,
                              plot_type='contour')
        plt.show()
    else:
        print(
            'Error, check input and also think of a better error message....  don\'t be lazy'
        )
예제 #15
0
 def pdp_interact_plot(self, feature, var_name=None, sample = 10000, which_classes = None,
                  num_grid_points=[10, 10], plot_types = None, plot_params = {'cmap': ["#00cc00", "#002266"]}):        
     ft_plot = pdp.pdp_interact(
             model=self.md, dataset=self.sample(sample), 
             model_features=self.features, features=feature, 
             num_grid_points=num_grid_points, n_jobs=4)
     
     plot_types = ['contour', 'grid'] if plot_types is None else [plot_types]
     for plot_type in plot_types:
         figs, ax = pdp.pdp_interact_plot(
             pdp_interact_out = ft_plot, 
             feature_names = var_name or feature, 
             plot_type= plot_type, plot_pdp=True, 
             which_classes=which_classes, plot_params = plot_params)
     plt.show()
예제 #16
0
 def test_contour_1(self, pdp_interact_out):
     fig, axes = pdp_interact_plot(pdp_interact_out=pdp_interact_out,
                                   feature_names=['feat_67', 'feat_24'],
                                   plot_type='contour',
                                   x_quantile=True,
                                   plot_pdp=True,
                                   which_classes=[1])
     assert type(fig) == matplotlib.figure.Figure
     assert sorted(axes.keys()) == ['pdp_inter_ax', 'title_ax']
     assert sorted(axes['pdp_inter_ax'].keys()) == [
         '_pdp_inter_ax', '_pdp_x_ax', '_pdp_y_ax'
     ]
     assert type(axes['title_ax']) == matplotlib.axes._subplots.Subplot
     for k in ['_pdp_inter_ax', '_pdp_x_ax', '_pdp_y_ax']:
         assert type(
             axes['pdp_inter_ax'][k]) == matplotlib.axes._subplots.Subplot
def plot_2d_pdp(model,
                X,
                y=None,
                X_unscaled=None,
                model_features=None,
                features=None,
                **kwargs):
    '''
    Plots a 1d pdp plot with the x-axis being unscaled.
    
    X_scaled: A pandas dataframe or numpy array.
    Contains the unscaled values of X.
    
    All other variables are the same as for plot_1d_pdp()
    '''

    if y is not None:
        model.fit(X, y)

    pdp_plt = pdp.pdp_interact(model=model,
                               dataset=X,
                               model_features=model_features,
                               features=features)

    fig, ax = pdp.pdp_interact_plot(pdp_plt, feature_names=features, **kwargs)

    if X_unscaled is not None:
        meanx = X_unscaled[features[0]].mean()
        stdx = X_unscaled[features[0]].std()

        #Unscale x values
        def unscale_xticks(x, pos):
            return ('%.1f' % (x * stdx + meanx))

        meany = X_unscaled[features[1]].mean()
        stdy = X_unscaled[features[1]].std()

        #Unscale y values
        def unscale_yticks(x, pos):
            return ('%.1f' % (x * stdy + meany))

        ax['pdp_inter_ax'].xaxis.set_major_formatter(
            mticker.FuncFormatter(unscale_xticks))
        ax['pdp_inter_ax'].yaxis.set_major_formatter(
            mticker.FuncFormatter(unscale_yticks))

    return fig, ax
예제 #18
0
def pdp_2d_pdp(term_type, train_data, fea_2d_1, fea_2d_2, fea_nam):

    fea_1_min = min(train_data[fea_2d_1].values)
    fea_1_max = max(train_data[fea_2d_1].values)
    fea_2_min = min(train_data[fea_2d_2].values)
    fea_2_max = max(train_data[fea_2d_2].values)

    inter_rf = pdp.pdp_interact(
        model=rfr, dataset=train_data, model_features=fea_nam,
        cust_grid_points=[np.linspace(fea_1_min, fea_1_max, 10), np.linspace(fea_2_min, fea_2_max, 10)],
        features=[fea_2d_1, fea_2d_2])

    fig, axes = pdp.pdp_interact_plot(
        inter_rf, [fea_2d_1, fea_2d_2], x_quantile=False, plot_type='contour', plot_pdp=False)

    fig.savefig("./results/{}_{}-{}_2d_pdp.png".format(term_type, fea_2d_1, fea_2d_2), dpi=300)
    return fig
예제 #19
0
def save_pdp_plot_2d(model, X_train, features, n_jobs, figure_saver=None):
    model.n_jobs = n_jobs
    with parallel_backend("threading", n_jobs=n_jobs):
        pdp_interact_out = pdp.pdp_interact(
            model=model,
            dataset=X_train,
            model_features=X_train.columns,
            features=features,
            num_grid_points=[20, 20],
        )

    fig, axes = pdp.pdp_interact_plot(
        pdp_interact_out, features, x_quantile=True, figsize=(7, 8)
    )
    axes["pdp_inter_ax"].xaxis.set_tick_params(rotation=45)
    if figure_saver is not None:
        figure_saver.save_figure(fig, "__".join(features), sub_directory="pdp_2d")
예제 #20
0
def plot_modal():
    path = os.path.join(app.config['UPLOAD_FOLDER'],
                        session.get("filename", "not set"))
    index = request.args.get('model', default=0, type=int)
    estim = request.args.get('estimator', default=None, type=str)
    target_ft = session.get('target_ft', 'not set')
    features = session.get('features', 'not set')
    f1 = request.args.get('f1', default=None, type=str)
    f2 = request.args.get('f2', default=None, type=str)
    t1 = request.args.get('t1', default=None, type=str)
    X, y, data = process_data(path, "csv", target_ft)
    #remove nans
    data = data.dropna()

    chosen_class = list(np.unique(y)).index(int(float(t1)))
    with open("tmp_files/model_{}_{}.pickle".format(estim, str(index)),
              'rb') as filehandler:
        pipe = pickle.load(filehandler)
    mod_path = "modal_" + str(f1.replace('.', '_')) + \
        "_" + str(f2.replace('.', '_'))
    pdp_V1_V2 = pdp.pdp_interact(model=pipe,
                                 dataset=data,
                                 model_features=features,
                                 features=[f1, f2],
                                 num_grid_points=None,
                                 percentile_ranges=[None, None])
    fig, axes = pdp.pdp_interact_plot(pdp_V1_V2, [f1, f2],
                                      plot_type='grid',
                                      x_quantile=True,
                                      ncols=2,
                                      plot_pdp=True,
                                      which_classes=[chosen_class],
                                      plot_params={
                                          "subtitle":
                                          "For Class {}, Label: {}".format(
                                              chosen_class, t1)
                                      })
    fig.savefig("static/images/figs/" + mod_path,
                bbox_inches="tight",
                transparent=True)
    plt.figure()
    return render_template("modal_plot.html", plot_name=mod_path)
def construct_pdp_interact(model, feature_names,
                          dataset_x = dat_train_x, dataset_y = dat_train_y,
                          num_grid_points = num_grid_points_int, n_jobs = n_jobs,
                          model_features = dat_train_x.columns):
    inter_current = pdp.pdp_interact(
        model = model, dataset = dataset_x.join(dataset_y),
        num_grid_points = num_grid_points, n_jobs = n_jobs, ## needs to be 1 for XGBoost model!
        model_features = model_features, features = feature_names)
    fig, axes = pdp.pdp_interact_plot(
        inter_current, feature_names = feature_names, x_quantile = False, 
        plot_type = 'contour', plot_pdp = False, 
        plot_params = plot_params_pdp_int_default)
    axes["pdp_inter_ax"].set_xlabel(varnames_long_dict[feature_names[0]])
    axes["pdp_inter_ax"].set_ylabel(varnames_long_dict[feature_names[1]])
    ## [[here]] y-labels!
    axes["pdp_inter_ax"].set_title('Number of bike rides per hour\n(Partial Dependence Plot) for\n{0} and {1}\n'\
                             .format(varnames_long_dict[feature_names[0]], 
                                    varnames_long_dict[feature_names[1]]), 
                             y = 1)

    return fig
예제 #22
0
def pdp_interact_plot(model,
                      dataset,
                      model_features,
                      feature1,
                      feature2,
                      plot_type="grid",
                      x_quantile=True,
                      plot_pdp=False):
    """Wrapper for pdp.pdp_interact_plot. Uses pdp.pdp_interact."""
    pdp_interact_out = pdp.pdp_interact(
        model=model,
        dataset=dataset,
        model_features=model_features,
        features=[feature1, feature2],
    )

    fig, _ = pdp.pdp_interact_plot(
        pdp_interact_out=pdp_interact_out,
        feature_names=[feature1, feature2],
        plot_type=plot_type,
        x_quantile=x_quantile,
        plot_pdp=plot_pdp,
    )
    return fig
예제 #23
0
    model=model,
    dataset=X,
    model_features=x_cols,
    features=x_cols[:2],
    num_grid_points=[10, 10],
    percentile_ranges=[None, None],
    n_jobs=1,
)

# %% pdp_interact_plot: grid

fig, axes = pdp.pdp_interact_plot(
    pdp_interacted_tmp,
    feature_names=x_cols,
    plot_type='grid',
    x_quantile=True,
    ncols=2,
    plot_pdp=True,
    which_classes=[1, 2, 3],
)

# %% pdp_interact_plot: contour

try:
    fig, axes = pdp.pdp_interact_plot(
        pdp_interacted_tmp,
        feature_names=x_cols,
        plot_type='contour',
        x_quantile=True,
        # ncols=1,
        plot_pdp=True,
예제 #24
0
        encoded = fig_to_base64(pdp_plot_feature)
        html_pdp = '<img class="img-fluid" src="data:image/png;base64, {}">'.format(
            encoded.decode('utf-8'))
        html_partial_plot += html_pdp

    # -------------------------------------------------------------
    # 2D PARTIAL DEPENDENCE PLOTS
    # Similar to previous PDP plot except we use pdp_interact instead of pdp_isolate
    # and pdp_interact_plot instead of pdp_isolate_plot
    # features_to_plot = ['preg', 'skin']
    inter1 = pdp.pdp_interact(model=loaded_model,
                              dataset=dataframe_test,
                              model_features=feature_names,
                              features=features_to_plot2d)
    partial_plot = pdp.pdp_interact_plot(
        pdp_interact_out=inter1, feature_names=features_to_plot2d
    )  # plot_type='contour'  plot_type='grid'
    encoded = fig_to_base64(partial_plot)
    html_partial_plot2d = '<img class="img-fluid" src="data:image/png;base64, {}">'.format(
        encoded.decode('utf-8'))

    # -------------------------------------------------------------
    # SHAP PLOT
    data_for_prediction = dataframe_test.iloc[shap_row_to_show]
    explainer = shap.KernelExplainer(loaded_model.predict_proba,
                                     dataframe_test.values)
    shap_values = explainer.shap_values(data_for_prediction)
    shap.initjs()
    shap_plot = shap.force_plot(explainer.expected_value[1],
                                shap_values[1],
                                data_for_prediction,
#plt.title(f'Top {n} features pipeline5')
plt.title(f'Top {n} features Gradient Boosting')
importances4.sort_values()[-n:].plot.barh(color='grey');

!pip install pdpbox

# Partial Dependence Plots with 2 features
from pdpbox.pdp import pdp_interact, pdp_interact_plot
features2 = ['Latitude', 'Longitude Difference to State Capital']
interaction = pdp_interact(
                           model=gb,
                           dataset=X_val,
                           model_features=X_val.columns,
                           features=features2
                           )
pdp_interact_plot(interaction, plot_type='grid', feature_names=features2);

# A two feature partical dependence plot in 3D
pdp = interaction.pdp.pivot_table(
                                  values='preds',
                                  columns=features2[0],
                                  index=features2[1]
                                  )[::-1] # Slice notation to reverse index order so y axis is ascending
import plotly.graph_objs as go

target = 'Value of d parameter'

surface = go.Surface(x=pdp.columns,
                     y=pdp.index,
                     z=pdp.values)
# PDF between total of special request and is repeated guest 
# it shows that the numbers of unique grid points for each 
# total of special requestis 2 and repeated guest is 4
pdf_features = ['is_repeated_guest', 'total_of_special_requests']

booking_interaction = pdp_interact(
    model=Rand_forest, 
    dataset=X_val,
    model_features=X_val.columns, 
    features=pdf_features
)

# this  multiple classes which is total of special request and is_rpeated_quest
# with numbers of grid points that 2:4
pdp_interact_plot(booking_interaction, plot_type='grid', 
                  feature_names=pdf_features);

"""### Shapley Values
It is a good technique to show the insight of the model predictor and break down each model individually.
"""

# explain the individual observation
# if I want to look for the first row from X_test
# turn it into a datafrme
first_row=X_test_encoded.iloc[[0]]
first_row

# what is the actual reservation status for the hotel booking
# by the y_test for the first row which is checkout
y_test.iloc[[0]]
# Best number of iterations: 32

# --------------------Model interpretation----------------
# Plotting feature importances
gpb.plot_importance(bst)

# Partial dependence plots
from pdpbox import pdp
# Single variable plots (takes a few seconds to compute)
pdp_dist = pdp.pdp_isolate(model=bst, dataset=X_train, model_features=X_train.columns,
                           feature='variable_2', num_grid_points=50)
pdp.pdp_plot(pdp_dist, 'variable_2', plot_lines=True)
# Two variable interaction plot
inter_rf = pdp.pdp_interact(model=bst, dataset=X_train, model_features=X_train.columns,
                             features=['variable_1','variable_2'])
pdp.pdp_interact_plot(inter_rf, ['variable_1','variable_2'], x_quantile=True,
                      plot_type='contour', plot_pdp=True)# ignore any error message

# SHAP values and dependence plots
# Note: you need shap version>=0.36.0
import shap
shap_values = shap.TreeExplainer(bst).shap_values(X_test)
shap.summary_plot(shap_values, X_test)
shap.dependence_plot("variable_2", shap_values, X_test)


# --------------------Comparison to alternative approaches----------------
results = pd.DataFrame(columns = ["RMSE","Time"],
                       index = ["GPBoost", "Linear_ME","Boosting_Ign","Boosting_Cat","MERF"])
# 1. GPBoost
gp_model = gpb.GPModel(group_data=group_train)
start_time = time.time() # measure time
#Statistics of survivors based on Age and Pclass
fig, axes, summary_df = info_plots.target_plot_interact(
    df=titanic_data, features=['Age', 'Pclass'], feature_names=['Age', 'Pclass'], target=titanic_target
)
display(summary_df.head())
#Prediction of our model, impact if Age and Pclass
fig, axes, summary_df = info_plots.actual_plot_interact(
    model=titanic_model, X=titanic_data[titanic_features], features=['Age', 'Pclass'], feature_names=['Age', 'Pclass']
)
display(summary_df.head())
#PDP for the interaction between Age and Pclass
inter1 = pdp.pdp_interact(
    model=titanic_model, dataset=titanic_data, model_features=titanic_features, features=['Age', 'Pclass']
)
fig, axes = pdp.pdp_interact_plot(
    pdp_interact_out=inter1, feature_names=['age', 'Pclass'], plot_type='contour', x_quantile=True, plot_pdp=True
)

#Let's study the link between Fare and Sex
#Statistics of survivors based on Fare and Sex
fig, axes, summary_df = info_plots.target_plot_interact(
    df=titanic_data, features=['Fare', 'Sex'], feature_names=['Fare', 'Sex'], target=titanic_target
)
display(summary_df.head())
#Prediction of our model, impact if Fare and Gender
fig, axes, summary_df = info_plots.actual_plot_interact(
    model=titanic_model, X=titanic_data[titanic_features], features=['Fare', 'Sex'], feature_names=['Fare', 'Sex']
)
display(summary_df.head())
#PDP for the interaction between Age and Pclass
inter1 = pdp.pdp_interact(
예제 #29
0
# %%

##################################
# Interaction Partial Dependency #
##################################
inter1 = pdp.pdp_interact(model=rf_mod,
                          dataset=train_X,
                          model_features=train_X.columns,
                          features=['Miles_traveled', 'Season_offense'])

fig, ax = pdp.pdp_interact_plot(
    pdp_interact_out=inter1,
    feature_names=['Miles_traveled', 'Season_offense'],
    plot_type="grid",
    plot_params={
        'font_family': 'serif',
        'title_fontsize': 15,
        'fontsize': 15
    })

pdp.plt.savefig('../09_figures/inter_offense.png')

#%%

inter2 = pdp.pdp_interact(model=rf_mod,
                          dataset=train_X,
                          model_features=train_X.columns,
                          features=['Miles_traveled', 'Season_defense'])

fig, ax = pdp.pdp_interact_plot(
예제 #30
0
    X=df[X.columns],
    feature=_,
    feature_name=_,
    predict_kwds={}
    )

fig, axes, summary_df = info_plots.actual_plot_interact(
    model=baseline, X=df[X.columns], features=interactions_2way, feature_names=interactions_2way
)

#PDP Plot : Grid Plot
interactions = pdp.pdp_interact(
    model=baseline, dataset=df, model_features=X.columns, features=interactions_2way
)

fig, axes = pdp.pdp_interact_plot(interactions,interactions_2way , plot_type='grid', x_quantile=True, plot_pdp=False)


#SHAP
shap.initjs()
explainer=shap.TreeExplainer(baseline)
shap_values=explainer.shap_values(x_train[_feat])
shap.summary_plot(shap_values,x_train[_feat])

def ABS_SHAP(df_shap,df):
    #import matplotlib as plt
    # Make a copy of the input data
    shap_v = pd.DataFrame(df_shap)
    feature_list = df.columns
    shap_v.columns = feature_list
    df_v = df.copy().reset_index().drop('index',axis=1)