def plot_2d_pdp(self, features_2d_plot):
        # creating data to plot
        inter = pdp.pdp_interact(model=self.model,
                                 dataset=self.x,
                                 model_features=list(self.x.columns),
                                 features=features_2d_plot)

        # plot it
        plot_params = {
            # plot title and subtitle
            'title_fontsize': 15,
            'subtitle_fontsize': 12,
            # color for contour line
            'contour_color': 'white',
            'font_family': 'Arial',
            # matplotlib color map for interact plot
            'cmap': 'viridis',
            # fill alpha for interact plot
            'inter_fill_alpha': 0.8,
            # fontsize for interact plot text
            'inter_fontsize': 9,
        }
        pdp.pdp_interact_plot(pdp_interact_out=inter,
                              feature_names=features_2d_plot,
                              plot_type='contour',
                              plot_params=plot_params)

        # saving the plot
        plt.tight_layout()
        plt.savefig(self.out + '/dep_plot_2d.jpg', dpi=300)
        plt.close()
def plotFeatures(model, df, features, cat_features=[], encodings=None):
    '''
    Interacting features pdp plot
    '''
    
    interaction = pdp_interact(model=model,
                              dataset=df,
                              model_features=df.columns,
                              features=features,)
  
    pdp = interaction.pdp.pivot_table(values='preds',
                                        columns=features[0],
                                        index=features[1])[::-1]
    if encodings != None:
        for item in encodings:
            if item['col'] in features:
                feature_mapping = item['mapping']
                #feature_mapping = feature_mapping[feature_mapping.index.dropna()]
                cat_names = list(feature_mapping.keys())
                cat_codes = list(feature_mapping.values())
                if features.index(item['col']) == 0:         
                    pdp = pdp.rename(columns=dict(zip(cat_codes, cat_names)))
                else:
                    pdp = pdp.rename(index=dict(zip(cat_codes, cat_names)))

    plt.figure(figsize=(7,7))            
    sns.heatmap(pdp, annot=True, fmt='.3f', cmap='viridis')
    plt.title(f'PDP interact for \"{features[0]}\" and \"{features[1]}\"')
    plt.show()
def pdp_interact_explain(X, y, feature):
    import category_encoders as ce
    from sklearn.pipeline import make_pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.ensemble import RandomForestClassifier
    from pdpbox.pdp import pdp_interact, pdp_interact_plot

    # Encode, impute as needed
    X_encoded = ce.OrdinalEncoder().fit_transform(X)
    X_processed = SimpleImputer().fit_transform(X_encoded)

    # Pick a model and fit the data
    pdp_model = RandomForestClassifier(n_estimators=200,
                                       n_jobs=-1,
                                       random_state=6)
    pdp_model.fit(X_processed, y)

    # The actual plotting
    pdp_interact = pdp_interact(model=pdp_model,
                                dataset=X_encoded,
                                model_features=X_encoded.columns,
                                features=feature)
    # There's a TypeError in the pdpinteract code that prevents the axes from getting labels
    # and I can't be bothered to go fix their mistakes.
    # This ignores it and lets it continue with the plotting
    try:
        pdp_interact_plot(pdp_interact,
                          feature_names=feature,
                          plot_type='contour')
    except:
        pass
 def show_PDP_interact(self, features=[]):
     for f in features:
         pdp_interact = pdp.pdp_interact(self.model,self.X_train,
                                model_features=self.feature_names,
                                         features=f)
         pdp.pdp_interact_plot(pdp_interact,feature_names=f)
         plt.xticks(rotation=90)
         plt.show()
Exemple #5
0
def compute_pdp_interact(model, dataset, model_features, features):
    pdp_interact_out = pdp.pdp_interact(
        model=model,
        dataset=dataset,
        model_features=model_features,
        features=features,
    )
    return pdp_interact_out
def part_plot_2D(model, total_features, X_val, y_val, feature1, feature2):
    inter1 = pdp.pdp_interact(model=model,
                              dataset=X_val,
                              model_features=total_features,
                              features=[feature1, feature2])
    pdp.pdp_interact_plot(pdp_interact_out=inter1,
                          feature_names=[feature1, feature2],
                          plot_type='grid')
    plt.show()
Exemple #7
0
def two_dim_pdp(f):
    ''' Function for plotting a two dimension PDP'''
    inter = pdp.pdp_interact(model=rf,
                             dataset=X_train,
                             model_features=X_train.columns,
                             features=f)
    pdp.pdp_interact_plot(pdp_interact_out=inter,
                          feature_names=f,
                          plot_type='grid')
    plt.show()
Exemple #8
0
 def pdp_plot_bivariate(self, model, X_val, feature_pair):
     ''' pdp plot for feature pair
     model: fitted model
     X_val: validation dataset
     feature_pair : pair of feature (list)
     '''
     partial_plot  =  pdp.pdp_interact(model, X_val,
                                                  X_val.columns.tolist(), feature_pair)
     pdp.pdp_interact_plot(partial_plot, feature_pair, plot_type='contour')
     plt.show()
Exemple #9
0
def interaction(model, X, features, type = 'grid'):
    """
    plot interaction between features
    """
    #instantiate interaction vairable
    interaction = pdp_interact(model = model,
                            dataset = X,
                            model_features = X.columns,
                            features = features)
    #plot interactions
    pdp_interact_plot(interaction, plot_type = type, feature_names = features)
Exemple #10
0
def plot_pdp_interact(model, X_train, feats):
    """
    Function to plot dependency of target variable on the feature
    :param model: Trained model
    :param X_train: Datafram to get prediction of model from
    :param feats: List (size 2) of feature to plot target dependency for
    :param clusters: Flag to indicate is clusters are needed
    :param feat_name: Feature name to display on plot
    :return: partial dependency plot
    """
    x = get_sample(X_train, 1000)
    p = pdp.pdp_interact(model, x, x.columns, feats)
    return pdp.pdp_interact_plot(p, feats, plot_pdp=True)
def plot_2D_partial_dependency(fitted_model, X_test : pd.DataFrame, model_features : list, features : list , plot_type = 'contour'):
	""" have an error with the matplolib version 3.0.0 """

	# Create the data that we will plot
	pdp_obj = pdp.pdp_interact(model = fitted_model,
								dataset = X_test,
								model_features = model_features,
								features = features)

	# plot it
	pdp.pdp_interact_plot(pdp_interact_out = pdp_obj,
		feature_names = feature, plot_type = plot_type)
	plt.show()
 def test_binary_onehot(self, titanic_model, titanic_data,
                        titanic_features):
     pdp_interact_out = pdp_interact(
         model=titanic_model,
         dataset=titanic_data,
         model_features=titanic_features,
         features=['Sex', ['Embarked_C', 'Embarked_S', 'Embarked_Q']])
     assert pdp_interact_out._type == 'PDPInteract_instance'
     assert pdp_interact_out.n_classes == 2
     assert pdp_interact_out.which_class is None
     assert pdp_interact_out.features == [
         'Sex', ['Embarked_C', 'Embarked_S', 'Embarked_Q']
     ]
     assert pdp_interact_out.feature_types == ['binary', 'onehot']
     assert len(pdp_interact_out.feature_grids) == 2
     assert_array_equal(pdp_interact_out.feature_grids[0], np.array([0, 1]))
     assert_array_equal(
         pdp_interact_out.feature_grids[1],
         np.array(['Embarked_C', 'Embarked_S', 'Embarked_Q']))
     assert len(pdp_interact_out.pdp_isolate_outs) == 2
     expected = pd.DataFrame({
         'Embarked_C': {
             0: 0,
             3: 0,
             5: 1
         },
         'Embarked_Q': {
             0: 1,
             3: 1,
             5: 0
         },
         'Embarked_S': {
             0: 0,
             3: 0,
             5: 0
         },
         'Sex': {
             0: 0,
             3: 1,
             5: 1
         },
         'preds': {
             0: 0.7331125140190125,
             3: 0.21476328372955322,
             5: 0.2710586190223694
         }
     })
     assert_frame_equal(pdp_interact_out.pdp.iloc[[0, 3, 5]],
                        expected,
                        check_like=True,
                        check_dtype=False)
Exemple #13
0
 def pdp_interact_plot(self, feature, var_name=None, sample = 10000, which_classes = None,
                  num_grid_points=[10, 10], plot_types = None, plot_params = {'cmap': ["#00cc00", "#002266"]}):        
     ft_plot = pdp.pdp_interact(
             model=self.md, dataset=self.sample(sample), 
             model_features=self.features, features=feature, 
             num_grid_points=num_grid_points, n_jobs=4)
     
     plot_types = ['contour', 'grid'] if plot_types is None else [plot_types]
     for plot_type in plot_types:
         figs, ax = pdp.pdp_interact_plot(
             pdp_interact_out = ft_plot, 
             feature_names = var_name or feature, 
             plot_type= plot_type, plot_pdp=True, 
             which_classes=which_classes, plot_params = plot_params)
     plt.show()
def plot_2d_pdp(model,
                X,
                y=None,
                X_unscaled=None,
                model_features=None,
                features=None,
                **kwargs):
    '''
    Plots a 1d pdp plot with the x-axis being unscaled.
    
    X_scaled: A pandas dataframe or numpy array.
    Contains the unscaled values of X.
    
    All other variables are the same as for plot_1d_pdp()
    '''

    if y is not None:
        model.fit(X, y)

    pdp_plt = pdp.pdp_interact(model=model,
                               dataset=X,
                               model_features=model_features,
                               features=features)

    fig, ax = pdp.pdp_interact_plot(pdp_plt, feature_names=features, **kwargs)

    if X_unscaled is not None:
        meanx = X_unscaled[features[0]].mean()
        stdx = X_unscaled[features[0]].std()

        #Unscale x values
        def unscale_xticks(x, pos):
            return ('%.1f' % (x * stdx + meanx))

        meany = X_unscaled[features[1]].mean()
        stdy = X_unscaled[features[1]].std()

        #Unscale y values
        def unscale_yticks(x, pos):
            return ('%.1f' % (x * stdy + meany))

        ax['pdp_inter_ax'].xaxis.set_major_formatter(
            mticker.FuncFormatter(unscale_xticks))
        ax['pdp_inter_ax'].yaxis.set_major_formatter(
            mticker.FuncFormatter(unscale_yticks))

    return fig, ax
Exemple #15
0
def pdp_2d_pdp(term_type, train_data, fea_2d_1, fea_2d_2, fea_nam):

    fea_1_min = min(train_data[fea_2d_1].values)
    fea_1_max = max(train_data[fea_2d_1].values)
    fea_2_min = min(train_data[fea_2d_2].values)
    fea_2_max = max(train_data[fea_2d_2].values)

    inter_rf = pdp.pdp_interact(
        model=rfr, dataset=train_data, model_features=fea_nam,
        cust_grid_points=[np.linspace(fea_1_min, fea_1_max, 10), np.linspace(fea_2_min, fea_2_max, 10)],
        features=[fea_2d_1, fea_2d_2])

    fig, axes = pdp.pdp_interact_plot(
        inter_rf, [fea_2d_1, fea_2d_2], x_quantile=False, plot_type='contour', plot_pdp=False)

    fig.savefig("./results/{}_{}-{}_2d_pdp.png".format(term_type, fea_2d_1, fea_2d_2), dpi=300)
    return fig
Exemple #16
0
def save_pdp_plot_2d(model, X_train, features, n_jobs, figure_saver=None):
    model.n_jobs = n_jobs
    with parallel_backend("threading", n_jobs=n_jobs):
        pdp_interact_out = pdp.pdp_interact(
            model=model,
            dataset=X_train,
            model_features=X_train.columns,
            features=features,
            num_grid_points=[20, 20],
        )

    fig, axes = pdp.pdp_interact_plot(
        pdp_interact_out, features, x_quantile=True, figsize=(7, 8)
    )
    axes["pdp_inter_ax"].xaxis.set_tick_params(rotation=45)
    if figure_saver is not None:
        figure_saver.save_figure(fig, "__".join(features), sub_directory="pdp_2d")
def pdp_2d(clf,X,Y,features_to_plot,label,plot_type='contour'):
    from pdpbox import pdp

    # Extract the classifier object from the clf multilearn object
    index = Y.columns.to_list().index(label)
    clf = clf.classifiers_[index]
    clf.verbose = False #Turn verbose off after this to tidy prints

    inter  =  pdp.pdp_interact(model=clf, dataset=X, model_features=X.columns.to_list(), features=features_to_plot,percentile_ranges=[(5,95),(5,95)])
    if(plot_type=='grid'):
        fig, ax = pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=features_to_plot, plot_type='grid',
                x_quantile=True,plot_pdp=True)
    elif(plot_type=='contour'):
        fig, ax = pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=features_to_plot, plot_type='contour')

    clf.verbose = True # reset

    return fig, ax
 def test_binary_numeric(self, titanic_model, titanic_data,
                         titanic_features):
     pdp_interact_out = pdp_interact(model=titanic_model,
                                     dataset=titanic_data,
                                     model_features=titanic_features,
                                     features=['Fare', 'Sex'])
     assert pdp_interact_out._type == 'PDPInteract_instance'
     assert pdp_interact_out.n_classes == 2
     assert pdp_interact_out.which_class is None
     assert pdp_interact_out.features == ['Fare', 'Sex']
     assert pdp_interact_out.feature_types == ['numeric', 'binary']
     assert len(pdp_interact_out.feature_grids) == 2
     assert_array_almost_equal(pdp_interact_out.feature_grids[0],
                               np.array([
                                   0., 7.73284444, 7.8958, 8.6625, 13.,
                                   16.7, 26., 35.11111111, 73.5, 512.3292
                               ]),
                               decimal=8)
     assert_array_equal(pdp_interact_out.feature_grids[1], np.array([0, 1]))
     assert len(pdp_interact_out.pdp_isolate_outs) == 2
     expected = pd.DataFrame({
         'Fare': {
             0: 0.0,
             6: 8.6625,
             12: 26.0,
             18: 512.3292
         },
         'Sex': {
             0: 0.0,
             6: 0.0,
             12: 0.0,
             18: 0.0
         },
         'preds': {
             0: 0.6237624883651733,
             6: 0.6005081534385681,
             12: 0.6391391158103943,
             18: 0.7784096002578735
         }
     })
     assert_frame_equal(pdp_interact_out.pdp.iloc[[0, 6, 12, 18]],
                        expected,
                        check_like=True,
                        check_dtype=False)
Exemple #19
0
def ind_cond_exp(model_line, X_train, y_data):

    empty_list = []

    for col in X_train.columns:
        print(col)
        empty_list.append(col)

    from pdpbox.pdp import pdp_interact, pdp_interact_plot
    X_features = empty_list

    features = empty_list[1:3]

    interaction = pdp_interact(model=model_line,
                               dataset=X_train,
                               model_features=X_features,
                               features=features)

    #pdp_goals = pdp.pdp_isolate(model=model_line, dataset=X_train, model_features=X_features, feature='sqft_living')

    pdp_interact_plot(interaction, plot_type='grid', feature_names=features)

    import seaborn as sns

    pdp = interaction.pdp.pivot_table(
        values='preds', columns=features[0], index=features[1]
    )[::-1]  # Slice notation to reverse index order so y axis is ascending

    #plt.figure(figsize=(10,8))
    #  sns.heatmap(pdp, annot=True, fmt='.2f', cmap='viridis')
    #  plt.title('Partial Dependence on Interest Rate on Annual Income & Credit Score');

    #import plotly.graph_objs as go

    surface = go.Surface(x=pdp.columns, y=pdp.index, z=pdp.values)

    fig = go.Figure(surface)
    fig.show()
    ee.layout = html.Div([dcc.Graph(figure=fig)])
    print("done")

    return ee.index()
Exemple #20
0
def plot_modal():
    path = os.path.join(app.config['UPLOAD_FOLDER'],
                        session.get("filename", "not set"))
    index = request.args.get('model', default=0, type=int)
    estim = request.args.get('estimator', default=None, type=str)
    target_ft = session.get('target_ft', 'not set')
    features = session.get('features', 'not set')
    f1 = request.args.get('f1', default=None, type=str)
    f2 = request.args.get('f2', default=None, type=str)
    t1 = request.args.get('t1', default=None, type=str)
    X, y, data = process_data(path, "csv", target_ft)
    #remove nans
    data = data.dropna()

    chosen_class = list(np.unique(y)).index(int(float(t1)))
    with open("tmp_files/model_{}_{}.pickle".format(estim, str(index)),
              'rb') as filehandler:
        pipe = pickle.load(filehandler)
    mod_path = "modal_" + str(f1.replace('.', '_')) + \
        "_" + str(f2.replace('.', '_'))
    pdp_V1_V2 = pdp.pdp_interact(model=pipe,
                                 dataset=data,
                                 model_features=features,
                                 features=[f1, f2],
                                 num_grid_points=None,
                                 percentile_ranges=[None, None])
    fig, axes = pdp.pdp_interact_plot(pdp_V1_V2, [f1, f2],
                                      plot_type='grid',
                                      x_quantile=True,
                                      ncols=2,
                                      plot_pdp=True,
                                      which_classes=[chosen_class],
                                      plot_params={
                                          "subtitle":
                                          "For Class {}, Label: {}".format(
                                              chosen_class, t1)
                                      })
    fig.savefig("static/images/figs/" + mod_path,
                bbox_inches="tight",
                transparent=True)
    plt.figure()
    return render_template("modal_plot.html", plot_name=mod_path)
Exemple #21
0
def interactive_plot(a, b, model, X_encoded):
    """ Takes any 2 features in a dataset and creates an interactive 
        partial dependency plot.

        Utilizes the feature_mapping function to get the values of the 
        features inputted.

        a = first feature (column name)
        b = second feature (column name)
        model = Machine Learning model (do *not* use a pipeline here)
        X_encoded = the encoded X feature dataframe (validation or test) - ensure you
                    fit_transform your training data and transform your validation/
                    test data before passing

        Returns an interactive pdp plot.
    """
    # Assign inputs as features
    features = [a, b]

    # Build interaction model
    interaction = pdp_interact(model=model,
                               dataset=X_encoded,
                               model_features=X_encoded.columns,
                               features=features)

    # Create pivot table
    pdp = interaction.pdp.pivot_table(values='preds',
                                      columns=features[0],
                                      index=features[1])

    # Get names and codes from encoder.mapping
    _, a_names, a_codes = feature_mapping(features[0])
    _, b_names, b_codes = feature_mapping(features[1])

    # Add column & index names to pivot table
    pdp = pdp.rename(index=dict(zip(b_codes, b_names)),
                     columns=dict(zip(a_codes, a_names)))

    # Set plot's figure size
    plt.figure(figsize=(10, 8))
    return sns.heatmap(pdp, annot=True, fmt='.2f', cmap='YlGnBu')
 def test_binary_numeric(self, ross_model, ross_data, ross_features):
     pdp_interact_out = pdp_interact(
         model=ross_model,
         dataset=ross_data,
         model_features=ross_features,
         features=['SchoolHoliday', 'weekofyear'])
     assert pdp_interact_out._type == 'PDPInteract_instance'
     assert pdp_interact_out.n_classes == 0
     assert pdp_interact_out.which_class is None
     assert pdp_interact_out.features == ['SchoolHoliday', 'weekofyear']
     assert pdp_interact_out.feature_types == ['binary', 'numeric']
     assert len(pdp_interact_out.feature_grids) == 2
     assert_array_equal(pdp_interact_out.feature_grids[0], np.array([0, 1]))
     assert_array_equal(
         pdp_interact_out.feature_grids[1],
         np.array([1., 5., 10., 15., 20., 25., 30., 37., 45., 52.]))
     assert len(pdp_interact_out.feature_grids) == 2
     expected = pd.DataFrame({
         'SchoolHoliday': {
             0: 0.0,
             6: 0.0,
             12: 1.0,
             18: 1.0
         },
         'preds': {
             0: 6369.878633951306,
             6: 5831.552135812868,
             12: 7311.965564610852,
             18: 7129.481794228513
         },
         'weekofyear': {
             0: 1.0,
             6: 30.0,
             12: 10.0,
             18: 45.0
         }
     })
     assert_frame_equal(pdp_interact_out.pdp.iloc[[0, 6, 12, 18]],
                        expected,
                        check_like=True,
                        check_dtype=False)
def construct_pdp_interact(model, feature_names,
                          dataset_x = dat_train_x, dataset_y = dat_train_y,
                          num_grid_points = num_grid_points_int, n_jobs = n_jobs,
                          model_features = dat_train_x.columns):
    inter_current = pdp.pdp_interact(
        model = model, dataset = dataset_x.join(dataset_y),
        num_grid_points = num_grid_points, n_jobs = n_jobs, ## needs to be 1 for XGBoost model!
        model_features = model_features, features = feature_names)
    fig, axes = pdp.pdp_interact_plot(
        inter_current, feature_names = feature_names, x_quantile = False, 
        plot_type = 'contour', plot_pdp = False, 
        plot_params = plot_params_pdp_int_default)
    axes["pdp_inter_ax"].set_xlabel(varnames_long_dict[feature_names[0]])
    axes["pdp_inter_ax"].set_ylabel(varnames_long_dict[feature_names[1]])
    ## [[here]] y-labels!
    axes["pdp_inter_ax"].set_title('Number of bike rides per hour\n(Partial Dependence Plot) for\n{0} and {1}\n'\
                             .format(varnames_long_dict[feature_names[0]], 
                                    varnames_long_dict[feature_names[1]]), 
                             y = 1)

    return fig
def show_partial_dependence(model, val_X, features):
    '''
        Takes the model and dataframe for validation set (X)
        then plots the partial dependence plot
        For more on this, check https://www.kaggle.com/dansbecker/partial-plots?utm_medium=email&utm_source=mailchimp&utm_campaign=ml4insights 
    '''
    from matplotlib import pyplot as plt
    from pdpbox import pdp, get_dataset, info_plots  # Do I need get_dataset and info_plots???

    feature_names = [i for i in val_X.columns if val_X[i].dtype in [np.int64]]

    if (not type(features) == list):
        # Create the data that we will plot
        pdp_feature = pdp.pdp_isolate(model=model,
                                      dataset=val_X,
                                      model_features=feature_names,
                                      feature=features)

        # plot it
        pdp.pdp_plot(pdp_feature, feature)
        plt.show()

    elif (type(features) == list) & (len(features) == 2):
        # Similar to previous PDP plot except we use pdp_interact instead of pdp_isolate and pdp_interact_plot instead of pdp_isolate_plot
        inter1 = pdp.pdp_interact(model=model,
                                  dataset=val_X,
                                  model_features=feature_names,
                                  features=features)

        pdp.pdp_interact_plot(pdp_interact_out=inter1,
                              feature_names=features_to_plot,
                              plot_type='contour')
        plt.show()
    else:
        print(
            'Error, check input and also think of a better error message....  don\'t be lazy'
        )
Exemple #25
0
def pdp_interact_plot(model,
                      dataset,
                      model_features,
                      feature1,
                      feature2,
                      plot_type="grid",
                      x_quantile=True,
                      plot_pdp=False):
    """Wrapper for pdp.pdp_interact_plot. Uses pdp.pdp_interact."""
    pdp_interact_out = pdp.pdp_interact(
        model=model,
        dataset=dataset,
        model_features=model_features,
        features=[feature1, feature2],
    )

    fig, _ = pdp.pdp_interact_plot(
        pdp_interact_out=pdp_interact_out,
        feature_names=[feature1, feature2],
        plot_type=plot_type,
        x_quantile=x_quantile,
        plot_pdp=plot_pdp,
    )
    return fig
Exemple #26
0
fig, axes, summary_df = info_plots.actual_plot_interact(
    model=model,
    X=X,
    features=x_cols[3:],
    feature_names=x_cols[3:],
    which_classes=[2, 5],
)

# %% pdp_interact: Preset ----------------------------------------------------

pdp_interacted_tmp = pdp.pdp_interact(
    model=model,
    dataset=X,
    model_features=x_cols,
    features=x_cols[:2],
    num_grid_points=[10, 10],
    percentile_ranges=[None, None],
    n_jobs=1,
)

# %% pdp_interact_plot: grid

fig, axes = pdp.pdp_interact_plot(
    pdp_interacted_tmp,
    feature_names=x_cols,
    plot_type='grid',
    x_quantile=True,
    ncols=2,
    plot_pdp=True,
    which_classes=[1, 2, 3],
Exemple #27
0
        pdp_plot_feature = pdp.pdp_plot(pdp_feature, i)
        graph_name = ''.join(
            random.sample((string.ascii_uppercase + string.digits), 3))
        html_pdp = 'html_pdp_plot' + graph_name + ' + '
        encoded = fig_to_base64(pdp_plot_feature)
        html_pdp = '<img class="img-fluid" src="data:image/png;base64, {}">'.format(
            encoded.decode('utf-8'))
        html_partial_plot += html_pdp

    # -------------------------------------------------------------
    # 2D PARTIAL DEPENDENCE PLOTS
    # Similar to previous PDP plot except we use pdp_interact instead of pdp_isolate
    # and pdp_interact_plot instead of pdp_isolate_plot
    # features_to_plot = ['preg', 'skin']
    inter1 = pdp.pdp_interact(model=loaded_model,
                              dataset=dataframe_test,
                              model_features=feature_names,
                              features=features_to_plot2d)
    partial_plot = pdp.pdp_interact_plot(
        pdp_interact_out=inter1, feature_names=features_to_plot2d
    )  # plot_type='contour'  plot_type='grid'
    encoded = fig_to_base64(partial_plot)
    html_partial_plot2d = '<img class="img-fluid" src="data:image/png;base64, {}">'.format(
        encoded.decode('utf-8'))

    # -------------------------------------------------------------
    # SHAP PLOT
    data_for_prediction = dataframe_test.iloc[shap_row_to_show]
    explainer = shap.KernelExplainer(loaded_model.predict_proba,
                                     dataframe_test.values)
    shap_values = explainer.shap_values(data_for_prediction)
    shap.initjs()
# Plot feature importances
n = 6
plt.figure(figsize=(10,n/2))
#plt.title(f'Top {n} features pipeline5')
plt.title(f'Top {n} features Gradient Boosting')
importances4.sort_values()[-n:].plot.barh(color='grey');

!pip install pdpbox

# Partial Dependence Plots with 2 features
from pdpbox.pdp import pdp_interact, pdp_interact_plot
features2 = ['Latitude', 'Longitude Difference to State Capital']
interaction = pdp_interact(
                           model=gb,
                           dataset=X_val,
                           model_features=X_val.columns,
                           features=features2
                           )
pdp_interact_plot(interaction, plot_type='grid', feature_names=features2);

# A two feature partical dependence plot in 3D
pdp = interaction.pdp.pivot_table(
                                  values='preds',
                                  columns=features2[0],
                                  index=features2[1]
                                  )[::-1] # Slice notation to reverse index order so y axis is ascending
import plotly.graph_objs as go

target = 'Value of d parameter'

surface = go.Surface(x=pdp.columns,
)

pdp_plot(isolated_features, feature_name=pdf_feature);

"""### Interactive Partial Dependence Plots with 2 features"""

from pdpbox.pdp import pdp_interact, pdp_interact_plot

# PDF between total of special request and is repeated guest 
# it shows that the numbers of unique grid points for each 
# total of special requestis 2 and repeated guest is 4
pdf_features = ['is_repeated_guest', 'total_of_special_requests']

booking_interaction = pdp_interact(
    model=Rand_forest, 
    dataset=X_val,
    model_features=X_val.columns, 
    features=pdf_features
)

# this  multiple classes which is total of special request and is_rpeated_quest
# with numbers of grid points that 2:4
pdp_interact_plot(booking_interaction, plot_type='grid', 
                  feature_names=pdf_features);

"""### Shapley Values
It is a good technique to show the insight of the model predictor and break down each model individually.
"""

# explain the individual observation
# if I want to look for the first row from X_test
# turn it into a datafrme
best_iter = np.argmin(cvbst['l2-mean'])
print("Best number of iterations: " + str(best_iter))
# Best number of iterations: 32

# --------------------Model interpretation----------------
# Plotting feature importances
gpb.plot_importance(bst)

# Partial dependence plots
from pdpbox import pdp
# Single variable plots (takes a few seconds to compute)
pdp_dist = pdp.pdp_isolate(model=bst, dataset=X_train, model_features=X_train.columns,
                           feature='variable_2', num_grid_points=50)
pdp.pdp_plot(pdp_dist, 'variable_2', plot_lines=True)
# Two variable interaction plot
inter_rf = pdp.pdp_interact(model=bst, dataset=X_train, model_features=X_train.columns,
                             features=['variable_1','variable_2'])
pdp.pdp_interact_plot(inter_rf, ['variable_1','variable_2'], x_quantile=True,
                      plot_type='contour', plot_pdp=True)# ignore any error message

# SHAP values and dependence plots
# Note: you need shap version>=0.36.0
import shap
shap_values = shap.TreeExplainer(bst).shap_values(X_test)
shap.summary_plot(shap_values, X_test)
shap.dependence_plot("variable_2", shap_values, X_test)


# --------------------Comparison to alternative approaches----------------
results = pd.DataFrame(columns = ["RMSE","Time"],
                       index = ["GPBoost", "Linear_ME","Boosting_Ign","Boosting_Cat","MERF"])
# 1. GPBoost