Ejemplo n.º 1
0
    def add_cohort_metrics(self, df, var_name="", operator="", value=""):
        """
        data = main_data 
        name = cohort_name 
        var_name = name of the variable to slice/dice 
        operator: >, <, =, >=, <= 
        value = value of the variable  

        """
        if value != "":
            #Extract filtered predicted values
            _, predicted, condition_predict = self.filtered_dataframe(
                df, "Model Decision", var_name, operator, value)
            #Extract filtered true labels
            _, true_values, condition_true = self.filtered_dataframe(
                df, "True Values", var_name, operator, value)
            #calculate metrics
            if is_classification(self.model) is True:
                if len(true_values) != 0:
                    accuracy, precision, recall, fpr, fnr = self.classification_cohort_metrics(
                        true_values, predicted)
                    self.cohort_set[
                        condition_predict] = self.generate_classification_divs(
                            accuracy, precision, recall, fpr, fnr)
                else:
                    pass
            else:
                if len(true_values) != 0:
                    mae, mse, r2 = self.regression_cohort_metrics(
                        true_values, predicted)
                    #save these metrics to an array
                    self.cohort_set[
                        condition_predict] = self.generator_regression_divs(
                            mae, mse, r2)
                else:
                    pass
        else:
            main_dataset, condition = self.filtered_dataframe(
                df, "Model Decision")
            true_data, _ = self.filtered_dataframe(df, "True Values")
            if is_classification(self.model) is True:
                if len(true_data) != 0:
                    accuracy, precision, recall, fpr, fnr = self.classification_cohort_metrics(
                        true_data, main_dataset)
                    self.cohort_set[
                        condition] = self.generate_classification_divs(
                            accuracy, precision, recall, fpr, fnr)
                else:
                    pass
            else:
                if len(true_data) != 0:
                    mae, mse, r2 = self.regression_cohort_metrics(
                        true_data, main_dataset)
                    #save these metrics to an array
                    self.cohort_set[
                        condition] = self.generator_regression_divs(
                            mae, mse, r2)
                else:
                    pass
Ejemplo n.º 2
0
 def insight_2_local_feature_impact(self, df, y_and_prob):
     if is_classification(self.model) == True:
         return self.classification.insight_2_local_feature_impact(
             df, y_and_prob)
     else:
         return self.regression.insight_2_local_feature_impact(
             df, y_and_prob)
Ejemplo n.º 3
0
 def make_predictions(self):
     """ [Initiate the prediction function]
     
     Args:
         Model
         input_data
         target_data
     
     Return:
         prediction column
         probabilities [if is_classifier]
     
     """
     if is_classification(self.model):
         if self.ct == None:
             prediction = self.model.predict(self.input_data.to_numpy())
             probabilities = self.model.predict_proba(self.input_data.to_numpy())
             return prediction, probabilities
         elif self.ct != None: 
             prediction = self.model.predict(self.data_into_model())
             probabilities = self.model.predict_proba(self.data_into_model())
             return prediction, probabilities
         else:
             raise Exception(("{} not supported. Please create an issue on Github").format(self.model))
         
     else:
         if self.ct == None:
             prediction = self.model.predict(self.input_data)
             return prediction
         elif self.ct != None: 
             prediction = self.model.predict(self.data_into_model())
             return prediction
         else:
             raise Exception(("{} not supported. Please create an issue on Github").format(self.self.model))
Ejemplo n.º 4
0
 def shap_explainer(self):
     if is_classification(self.model):
         if self.ct == None:
             try:
                 explainer, pred, pred_prob = self.tree_explainer()
                 return explainer, pred, pred_prob
             except:
                 try:
                     explainer, pred, pred_prob = self.kernel_explainer()
                     return explainer, pred, pred_prob
                 except:
                     raise Exception(("{} not supported. Please create an issue on Github").format(self.model))
         else:
             try:
                 explainer, pred, pred_fcn = self.kernel_explainer_with_ct()
                 return explainer, pred, pred_fcn
             except:
                 raise Exception(("{} not supported. Please create an issue on Github").format(self.model))
     else:
         if self.ct == None:
             try:
                 explainer, pred = self.tree_explainer()
                 return explainer, pred
             except:
                 try:
                     explainer, pred = self.kernel_explainer()
                     return explainer, pred
                 except:
                     raise Exception(("{} not supported. Please create an issue on Github").format(self.model))
         else:
             try:
                 explainer, pred_fcn = self.kernel_explainer_with_ct()
                 return explainer, pred_fcn
             except:
                 raise Exception(("{} not supported. Please create an issue on Github").format(self.model))
Ejemplo n.º 5
0
 def insight_2_global_feature_impact(self, df, outcome=0):
     if is_classification(self.model) == True:
         return self.classification.insight_2_global_feature_impact(
             df, outcome, self.param["expected_values"],
             self.param["classes"])
     else:
         return self.regression.insight_2_global_feature_impact(
             df, self.param["expected_values"][0])
Ejemplo n.º 6
0
 def log_metrics(self):
     if is_classification(self.model) == True:
         predict, _ = self.make_predictions()
         metrics = self.classification_metrics(self.target_data, predict)
         return metrics
     else:
         predict = self.make_predictions()
         metrics = self.regression_metrics(self.target_data, predict)
         return metrics
Ejemplo n.º 7
0
    def data_for_shap(self, input_data):
        if is_classification(self.model):
            explainer, pred, pred_fcn = self.shap_explainer()
            if type(explainer) == shap.explainers._tree.Tree:
                global_shap_values = explainer.shap_values(input_data)
                data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], 
                                                                in_data=input_data.copy(), scope="local")
                prediction = pred([input_data])
                probabilities = pred_fcn([input_data])
                
                data_with_shap['Model Decision'] = prediction[0]
                #data_with_shap['True Values'] = self.actual_data
                

                for i in range(len(np.unique(self.actual_data))):
                    data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0]
                return data_with_shap
            else:
                predictions = pred(shap.sample(input_data,100))
                global_shap_values = explainer.shap_values(shap.sample(input_data,100))  
                data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], 
                                                               in_data=shap.sample(input_data,100).copy(),
                                                              scope='local')  
                prediction = pred(shap.sample(input_data,100))
                probabilities = pred_fcn(shap.sample(input_data,100))
                data_with_shap['Model Decision'] = prediction[0]
                #data_with_shap['True Values'] = self.actual_data
                
                for i in range(len(np.unique(self.actual_data))):
                    data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0]

                return data_with_shap
        else:
            explainer, pred = self.shap_explainer()
            if type(explainer) == shap.explainers._tree.Tree:
                #Complete! Do not change. 
                global_shap_values = explainer.shap_values(input_data)
                data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, 
                                                                in_data=self.input_data.copy(),
                                                                  scope="local")
                data_with_shap['Model Decision'] = pred(input_data)
                #data_with_shap['True Values'] = self.actual_data
                
                return data_with_shap

            else:
                global_shap_values = explainer.shap_values(shap.sample(input_data,100))  
                predictions = pred(shap.sample(input_data,100))
                data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, 
                                                               in_data=shap.sample(input_data,100).copy(),
                                                              scope="local")
                data_with_shap['Model Decision'] = pred(shap.sample(self.input_data,100))
                #data_with_shap['True Values'] = self.actual_data
                
                return data_with_shap
Ejemplo n.º 8
0
    def shap_local(self, row_number):
        if is_classification(self.model):
            explainer, pred, pred_prob = self.shap_explainer()
            
        else:
            explainer, pred = self.shap_explainer()
            
        if row_number > len(self.input_data):
            raise IndexError(f"index {row_number} is out of bounds for axis 0 with size {len(self.input_data)}")
        else:
            if type(explainer) == shap.explainers._tree.Tree:
                local_shap_values = explainer.shap_values(self.input_data.iloc[row_number,:])
                row_with_shap = self.append_shap_values_to_df(input_sv = local_shap_values, 
                                                           in_data=self.input_data.iloc[row_number,:].copy(),
                                                             scope='local')

                row_with_shap['Model Decision'] = pred(pd.DataFrame(self.input_data.iloc[row_number]).T)[0]
                row_with_shap['True Values'] = pd.DataFrame(self.actual_data).iloc[row_number][0]
                
                if is_classification(self.model):
                    probabilities = pred_prob([np.array(self.input_data)[row_number]])
                    for i in range(len(np.unique(self.actual_data))):
                        row_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0]
                
                return explainer, local_shap_values, row_with_shap
            else:
                local_shap_values = explainer.shap_values(self.input_data.iloc[row_number,:])
                row_with_shap = self.append_shap_values_to_df(input_sv = local_shap_values, 
                                                           in_data= self.input_data.iloc[row_number,:].copy(),
                                                             scope='local')
                
                row_with_shap['Model Decision'] = pred(pd.DataFrame(self.input_data.iloc[row_number]).T)[0]
                row_with_shap['True Values'] = pd.DataFrame(self.actual_data).iloc[row_number][0]

                if is_classification(self.model):
                    probabilities = pred_prob(pd.DataFrame(self.input_data.iloc[row_number]).T)
                    for i in range(len(np.unique(self.actual_data))):
                        row_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0]

                return explainer, local_shap_values, row_with_shap
Ejemplo n.º 9
0
    def add_shap_row(self, input_data, row_number):
        if is_classification(self.model):
            explainer, pred, pred_prob = self.shap_explainer()
        else:
            explainer, pred = self.shap_explainer()

        if type(explainer) == shap.explainers._tree.Tree:
            shap_values = explainer.shap_values(input_data)
            

            shap_row = self.append_shap_values_to_df(input_sv = shap_values[0], 
                                                           in_data= input_data.copy(),
                                                             scope='local') 
                         
            shap_row['Model Decision'] = pred(pd.DataFrame(input_data))[0]
            shap_row['Actual Decision'] = self.actual_data[row_number]
             
            if is_classification(self.model):
                probabilities = pred_prob(np.array(input_data))
                for i in range(len(np.unique(self.actual_data))):
                    shap_row['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0]

            return shap_row
        else:
            shap_values = explainer.shap_values(input_data)
    
            shap_row = self.append_shap_values_to_df(input_sv = shap_values, 
                                                           in_data= input_data.copy(),
                                                             scope='local')

            shap_row['Model Decision'] = pred(pd.DataFrame(input_data).T)[0]
            if is_classification(self.model):
                probabilities = pred_prob([np.array(input_data)])
                for i in range(len(np.unique(self.actual_data))):
                    shap_row['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0]
            return shap_row
Ejemplo n.º 10
0
 def create_prediction_columns(self):
     """ [Create prediction columns and add them to the self.predicted_columns dictionary]
     Args:
         model
         x_test: 
         y_test
         ColumnTransformer
     """
     if is_classification(self.model) == True:
         prediction, probabilities = self.make_predictions()
         self.predicted_columns['Model Decision'] = prediction
         self.predicted_columns['True Values'] = self.target_data
         for i in range(len(np.unique(prediction))):
             self.predicted_columns['Probability: {}'.format(np.unique(prediction)[i])] = probabilities[:,i]
         
     else:
         prediction = self.make_predictions()
         self.predicted_columns['Model Decision'] = prediction
         self.predicted_columns['True Values'] = self.target_data
Ejemplo n.º 11
0
    def update_impact_graph(*values):
        changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
        
        
        df = pd.DataFrame([values[:-2]])
        df.columns = x_test.columns

        array = ShapleyValues.add_shap_row(df, values[-1])
        g = plotly_graphs()
        figure, dat = g.local_feature_impact_graph(array)
        
        if is_classification(ShapleyValues.model):
            y_and_prob = []
            y_and_prob.append(int(array["Model Decision"]))
            y_and_prob.append(round(float(array["Probability: "+str(int(array["Model Decision"])) ]),2))
            
            message = insight_classification.insight_2_local_feature_impact(dat, y_and_prob)
        else:
            y_and_prob = []
            y_and_prob.append(int(array["Model Decision"]))
            message = insight_regression.insight_2_local_feature_impact(dat, y_and_prob)

        
        return figure, message[0], message[1], message[2], message[3]
Ejemplo n.º 12
0
 def insight_3(self, df):
     if is_classification(self.model) == True:
         return self.classification.insight_3(df)
     else:
         return self.regression.insight_3(df)
Ejemplo n.º 13
0
 def signal(is_classification):
     if is_classification(model) == True:
         return x_test.columns[-4:]
     else:
         return x_test.columns[-2:]