# From the above grid search using cross validation, it turns out that a tree depth of 5, iterations of 500 and learning rate of 0.1 result in the most optimal RMSE. Hence using that for the final model.

# In[403]:

model = ctb.CatBoostRegressor(depth=5,
                              iterations=500,
                              learning_rate=0.1,
                              verbose=False)

model.fit(X_train, y_train)

# In[404]:

y_pred = model.predict(X_test)
cat_coeff_imp = model.get_feature_importance(type="PredictionValuesChange")

# In[431]:

len(cat_coeff_imp)

top_10_idx_cat = np.argsort(cat_coeff_imp)[-10:]

X_train.columns[top_10_idx_cat]

# In[398]:

explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_train)

# In[399]: