def add_cohort_metrics(self, df, var_name="", operator="", value=""): """ data = main_data name = cohort_name var_name = name of the variable to slice/dice operator: >, <, =, >=, <= value = value of the variable """ if value != "": #Extract filtered predicted values _, predicted, condition_predict = self.filtered_dataframe( df, "Model Decision", var_name, operator, value) #Extract filtered true labels _, true_values, condition_true = self.filtered_dataframe( df, "True Values", var_name, operator, value) #calculate metrics if is_classification(self.model) is True: if len(true_values) != 0: accuracy, precision, recall, fpr, fnr = self.classification_cohort_metrics( true_values, predicted) self.cohort_set[ condition_predict] = self.generate_classification_divs( accuracy, precision, recall, fpr, fnr) else: pass else: if len(true_values) != 0: mae, mse, r2 = self.regression_cohort_metrics( true_values, predicted) #save these metrics to an array self.cohort_set[ condition_predict] = self.generator_regression_divs( mae, mse, r2) else: pass else: main_dataset, condition = self.filtered_dataframe( df, "Model Decision") true_data, _ = self.filtered_dataframe(df, "True Values") if is_classification(self.model) is True: if len(true_data) != 0: accuracy, precision, recall, fpr, fnr = self.classification_cohort_metrics( true_data, main_dataset) self.cohort_set[ condition] = self.generate_classification_divs( accuracy, precision, recall, fpr, fnr) else: pass else: if len(true_data) != 0: mae, mse, r2 = self.regression_cohort_metrics( true_data, main_dataset) #save these metrics to an array self.cohort_set[ condition] = self.generator_regression_divs( mae, mse, r2) else: pass
def insight_2_local_feature_impact(self, df, y_and_prob): if is_classification(self.model) == True: return self.classification.insight_2_local_feature_impact( df, y_and_prob) else: return self.regression.insight_2_local_feature_impact( df, y_and_prob)
def make_predictions(self): """ [Initiate the prediction function] Args: Model input_data target_data Return: prediction column probabilities [if is_classifier] """ if is_classification(self.model): if self.ct == None: prediction = self.model.predict(self.input_data.to_numpy()) probabilities = self.model.predict_proba(self.input_data.to_numpy()) return prediction, probabilities elif self.ct != None: prediction = self.model.predict(self.data_into_model()) probabilities = self.model.predict_proba(self.data_into_model()) return prediction, probabilities else: raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) else: if self.ct == None: prediction = self.model.predict(self.input_data) return prediction elif self.ct != None: prediction = self.model.predict(self.data_into_model()) return prediction else: raise Exception(("{} not supported. Please create an issue on Github").format(self.self.model))
def shap_explainer(self): if is_classification(self.model): if self.ct == None: try: explainer, pred, pred_prob = self.tree_explainer() return explainer, pred, pred_prob except: try: explainer, pred, pred_prob = self.kernel_explainer() return explainer, pred, pred_prob except: raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) else: try: explainer, pred, pred_fcn = self.kernel_explainer_with_ct() return explainer, pred, pred_fcn except: raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) else: if self.ct == None: try: explainer, pred = self.tree_explainer() return explainer, pred except: try: explainer, pred = self.kernel_explainer() return explainer, pred except: raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) else: try: explainer, pred_fcn = self.kernel_explainer_with_ct() return explainer, pred_fcn except: raise Exception(("{} not supported. Please create an issue on Github").format(self.model))
def insight_2_global_feature_impact(self, df, outcome=0): if is_classification(self.model) == True: return self.classification.insight_2_global_feature_impact( df, outcome, self.param["expected_values"], self.param["classes"]) else: return self.regression.insight_2_global_feature_impact( df, self.param["expected_values"][0])
def log_metrics(self): if is_classification(self.model) == True: predict, _ = self.make_predictions() metrics = self.classification_metrics(self.target_data, predict) return metrics else: predict = self.make_predictions() metrics = self.regression_metrics(self.target_data, predict) return metrics
def data_for_shap(self, input_data): if is_classification(self.model): explainer, pred, pred_fcn = self.shap_explainer() if type(explainer) == shap.explainers._tree.Tree: global_shap_values = explainer.shap_values(input_data) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], in_data=input_data.copy(), scope="local") prediction = pred([input_data]) probabilities = pred_fcn([input_data]) data_with_shap['Model Decision'] = prediction[0] #data_with_shap['True Values'] = self.actual_data for i in range(len(np.unique(self.actual_data))): data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return data_with_shap else: predictions = pred(shap.sample(input_data,100)) global_shap_values = explainer.shap_values(shap.sample(input_data,100)) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], in_data=shap.sample(input_data,100).copy(), scope='local') prediction = pred(shap.sample(input_data,100)) probabilities = pred_fcn(shap.sample(input_data,100)) data_with_shap['Model Decision'] = prediction[0] #data_with_shap['True Values'] = self.actual_data for i in range(len(np.unique(self.actual_data))): data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return data_with_shap else: explainer, pred = self.shap_explainer() if type(explainer) == shap.explainers._tree.Tree: #Complete! Do not change. global_shap_values = explainer.shap_values(input_data) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, in_data=self.input_data.copy(), scope="local") data_with_shap['Model Decision'] = pred(input_data) #data_with_shap['True Values'] = self.actual_data return data_with_shap else: global_shap_values = explainer.shap_values(shap.sample(input_data,100)) predictions = pred(shap.sample(input_data,100)) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, in_data=shap.sample(input_data,100).copy(), scope="local") data_with_shap['Model Decision'] = pred(shap.sample(self.input_data,100)) #data_with_shap['True Values'] = self.actual_data return data_with_shap
def shap_local(self, row_number): if is_classification(self.model): explainer, pred, pred_prob = self.shap_explainer() else: explainer, pred = self.shap_explainer() if row_number > len(self.input_data): raise IndexError(f"index {row_number} is out of bounds for axis 0 with size {len(self.input_data)}") else: if type(explainer) == shap.explainers._tree.Tree: local_shap_values = explainer.shap_values(self.input_data.iloc[row_number,:]) row_with_shap = self.append_shap_values_to_df(input_sv = local_shap_values, in_data=self.input_data.iloc[row_number,:].copy(), scope='local') row_with_shap['Model Decision'] = pred(pd.DataFrame(self.input_data.iloc[row_number]).T)[0] row_with_shap['True Values'] = pd.DataFrame(self.actual_data).iloc[row_number][0] if is_classification(self.model): probabilities = pred_prob([np.array(self.input_data)[row_number]]) for i in range(len(np.unique(self.actual_data))): row_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return explainer, local_shap_values, row_with_shap else: local_shap_values = explainer.shap_values(self.input_data.iloc[row_number,:]) row_with_shap = self.append_shap_values_to_df(input_sv = local_shap_values, in_data= self.input_data.iloc[row_number,:].copy(), scope='local') row_with_shap['Model Decision'] = pred(pd.DataFrame(self.input_data.iloc[row_number]).T)[0] row_with_shap['True Values'] = pd.DataFrame(self.actual_data).iloc[row_number][0] if is_classification(self.model): probabilities = pred_prob(pd.DataFrame(self.input_data.iloc[row_number]).T) for i in range(len(np.unique(self.actual_data))): row_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return explainer, local_shap_values, row_with_shap
def add_shap_row(self, input_data, row_number): if is_classification(self.model): explainer, pred, pred_prob = self.shap_explainer() else: explainer, pred = self.shap_explainer() if type(explainer) == shap.explainers._tree.Tree: shap_values = explainer.shap_values(input_data) shap_row = self.append_shap_values_to_df(input_sv = shap_values[0], in_data= input_data.copy(), scope='local') shap_row['Model Decision'] = pred(pd.DataFrame(input_data))[0] shap_row['Actual Decision'] = self.actual_data[row_number] if is_classification(self.model): probabilities = pred_prob(np.array(input_data)) for i in range(len(np.unique(self.actual_data))): shap_row['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return shap_row else: shap_values = explainer.shap_values(input_data) shap_row = self.append_shap_values_to_df(input_sv = shap_values, in_data= input_data.copy(), scope='local') shap_row['Model Decision'] = pred(pd.DataFrame(input_data).T)[0] if is_classification(self.model): probabilities = pred_prob([np.array(input_data)]) for i in range(len(np.unique(self.actual_data))): shap_row['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return shap_row
def create_prediction_columns(self): """ [Create prediction columns and add them to the self.predicted_columns dictionary] Args: model x_test: y_test ColumnTransformer """ if is_classification(self.model) == True: prediction, probabilities = self.make_predictions() self.predicted_columns['Model Decision'] = prediction self.predicted_columns['True Values'] = self.target_data for i in range(len(np.unique(prediction))): self.predicted_columns['Probability: {}'.format(np.unique(prediction)[i])] = probabilities[:,i] else: prediction = self.make_predictions() self.predicted_columns['Model Decision'] = prediction self.predicted_columns['True Values'] = self.target_data
def update_impact_graph(*values): changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0] df = pd.DataFrame([values[:-2]]) df.columns = x_test.columns array = ShapleyValues.add_shap_row(df, values[-1]) g = plotly_graphs() figure, dat = g.local_feature_impact_graph(array) if is_classification(ShapleyValues.model): y_and_prob = [] y_and_prob.append(int(array["Model Decision"])) y_and_prob.append(round(float(array["Probability: "+str(int(array["Model Decision"])) ]),2)) message = insight_classification.insight_2_local_feature_impact(dat, y_and_prob) else: y_and_prob = [] y_and_prob.append(int(array["Model Decision"])) message = insight_regression.insight_2_local_feature_impact(dat, y_and_prob) return figure, message[0], message[1], message[2], message[3]
def insight_3(self, df): if is_classification(self.model) == True: return self.classification.insight_3(df) else: return self.regression.insight_3(df)
def signal(is_classification): if is_classification(model) == True: return x_test.columns[-4:] else: return x_test.columns[-2:]