コード例 #1
0
ファイル: explain.py プロジェクト: vedraiyani/explainx
    def ai(self, df, y, model, model_name="xgboost", mode=None):
        y_variable = "y_actual"
        y_variable_predict = "y_prediction"

        #shap
        c = calculate_shap()
        self.df_final = c.find(model, df, model_name=model_name)

        #prediction col
        if model_name == "xgboost":
            self.df_final[y_variable_predict] = model.predict(
                xgboost.DMatrix(df))

        elif model_name == "catboost":
            self.df_final[y_variable_predict] = model.predict(df.to_numpy())

        else:
            self.df_final[y_variable_predict] = model.predict(df.to_numpy())

        self.df_final[y_variable] = y

        d = dashboard()
        d.find(self.df_final, y_variable, y_variable_predict, mode)

        return True
コード例 #2
0
    def calculate_prediction_shap(self, df):
        if self.param["model_name"] == "xgboost":
            import xgboost
            if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']:
                print(
                    "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'")
                return False
            prediction_col = self.param["model"].predict(xgboost.DMatrix(df))

        elif self.param["model_name"]  == "catboost":
            prediction_col = self.param["model"].predict(df.to_numpy())

        else:
            prediction_col = self.param["model"].predict(df.to_numpy())

        # is classification?
        is_classification = self.param["is_classification"]

        # shap
        c = calculate_shap()
        df_final, explainer = c.find(self.param["model"], df, prediction_col, is_classification, model_name=self.param["model_name"])

        # prediction col
        df_final[self.param["y_variable_predict"]] = prediction_col

        return df_final
コード例 #3
0
    def calculate_prediction_shap(self, df):
        if self.param["model_name"] == "xgboost":
            import xgboost
            if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']:
                print(
                    "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'")
                return False
            prediction_col = self.param["model"].predict(xgboost.DMatrix(df))

        elif self.param["model_name"] == "catboost":
            prediction_col = self.param["model"].predict(df.to_numpy())

        elif self.param['model_name'] == 'h2o':
            df = h2o.H2OFrame(df)
            prediction_col = self.param["model"].predict(df)

        else:
            prediction_col = self.param["model"].predict(df.to_numpy())

        # is classification?
        is_classification = self.param["is_classification"]

        # shap
        c = calculate_shap()
        df_final, explainer = c.find(self.param["model"], df, prediction_col, is_classification,
                                     model_name=self.param["model_name"])

        # prediction col
        # df_final["y_prediction"] = prediction_col

        if is_classification is True:

            try:
                df_final = self.formatting_y_pred_for_h2o_classification(df_final, prediction_col)
                # find and add probabilities in the dataset.
                prediction_col_prob = self.param["model"].predict_proba(df.to_numpy())
            except:
                prediction_col_prob = self.param["model"].predict(df)
                prediction_col_prob = prediction_col_prob.as_data_frame()
            pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

            for c in pd_prediction_col_prob.columns:
                df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])

            # for c in pd_prediction_col_prob.columns:
            #     df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
            #     if c != 'predict':
            #         if "p" in c:
            #             res = c.split("p")[-1]
            #             df_final["Probability_" + str(res)] = list(pd_prediction_col_prob[c])
            #         else:
            #             df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
            #     else:
            #         df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
            df_final = self.formatting_h2o_prediction_prob(df_final, pd_prediction_col_prob)
        return df_final
コード例 #4
0
ファイル: dashboard.py プロジェクト: Sohaib90/explainx
    def calculate_prediction_shap(self, df):
        if self.param["model_name"] == "xgboost":
            import xgboost
            if xgboost.__version__ in [
                    '1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1'
            ]:
                print(
                    "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'"
                )
                return False
            prediction_col = self.param["model"].predict(xgboost.DMatrix(df))

        elif self.param["model_name"] == "catboost":
            prediction_col = self.param["model"].predict(df.to_numpy())

        else:
            prediction_col = self.param["model"].predict(df.to_numpy())

        # is classification?
        is_classification = self.param["is_classification"]

        # shap
        c = calculate_shap()
        df_final, explainer = c.find(self.param["model"],
                                     df,
                                     prediction_col,
                                     is_classification,
                                     model_name=self.param["model_name"])

        # prediction col
        df_final["y_prediction"] = prediction_col

        if is_classification == True:

            # find and add probabilities in the dataset.
            prediction_col_prob = self.param["model"].predict_proba(
                df.to_numpy())
            pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

            for c in pd_prediction_col_prob.columns:
                df_final["Probability_" + str(c)] = list(
                    pd_prediction_col_prob[c])

        return df_final
コード例 #5
0
    def ai(self,  df,  y, model, model_name="xgboost", mode=None):
        y_variable= "y_actual"
        y_variable_predict= "y_prediction"



        # If yes, then different shap functuions are required.
        # get the shap value based on predcton and make a new dataframe.

        # find predictions first as shap values need that.

        prediction_col=[]

        if model_name == "xgboost":
            import xgboost
            if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']:
                print("Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'")
                return False
            prediction_col = model.predict(xgboost.DMatrix(df))

        elif model_name == "catboost":
            prediction_col = model.predict(df.to_numpy())

        else:
            prediction_col = model.predict(df.to_numpy())

        # is classification?
        is_classification = self.is_classification_given_y_array(prediction_col)



        #shap
        c = calculate_shap()
        self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name)

        #prediction col
        self.df_final[y_variable_predict] = prediction_col



        self.df_final[y_variable] = y


        #additional inputs.
        if is_classification==True:
            # find and add probabilities in the dataset.
            prediction_col_prob = model.predict_proba(df.to_numpy())
            pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

            for c in pd_prediction_col_prob.columns:
                self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])

            classes = []
            for c in pd_prediction_col_prob.columns:
                classes.append(str(c))
            self.param["classes"]=classes

            try:
                expected_values_by_class = self.explainer.expected_value
            except:
                expected_values_by_class=[]
                for c in range(len(classes)):
                    expected_values_by_class.append(1/len(classes))


            self.param["expected_values"]= expected_values_by_class
        else:
            try:
                expected_values = self.explainer.expected_value
                self.param["expected_values"] = [expected_values]
            except:
                expected_value = [round(np.array(y).mean(),2)]
                self.param["expected_values"] = expected_value


        self.param["is_classification"]= is_classification
        self.param["model_name"]= model_name

        d= dashboard()
        d.find(self.df_final, y_variable, y_variable_predict, mode, self.param)

        return True
コード例 #6
0
ファイル: explain.py プロジェクト: Sohaib90/explainx
    def ai(self, df, y, model, model_name="xgboost", mode=None):
        y_variable = "y_actual"
        y_variable_predict = "y_prediction"
        instance_id = self.random_string_generator()
        analytics = Analytics()
        analytics['ip'] = analytics.finding_ip()
        analytics['mac'] = analytics.finding_address()
        analytics['instance_id'] = instance_id
        analytics['time'] = str(datetime.datetime.now())
        analytics['total_columns'] = len(df.columns)
        analytics['total_rows'] = len(df)
        analytics['os'] = analytics.finding_system()
        analytics['model_name'] = model_name
        analytics["function"] = 'before_dashboard'
        analytics["query"] = "before_dashboard"
        analytics['finish_time'] = ''
        analytics.insert_data()

        # If yes, then different shap functuions are required.
        # get the shap value based on predcton and make a new dataframe.

        # find predictions first as shap values need that.

        prediction_col = []

        if model_name == "xgboost":
            import xgboost
            if xgboost.__version__ in [
                    '1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1'
            ]:
                print(
                    "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'"
                )
                return False
            prediction_col = model.predict(xgboost.DMatrix(df))

        elif model_name == "catboost":
            prediction_col = model.predict(df.to_numpy())

        else:
            prediction_col = model.predict(df.to_numpy())

        # is classification?
        is_classification = self.is_classification_given_y_array(
            prediction_col)

        # shap
        c = calculate_shap()
        self.df_final, self.explainer = c.find(model,
                                               df,
                                               prediction_col,
                                               is_classification,
                                               model_name=model_name)

        # prediction col
        self.df_final[y_variable_predict] = prediction_col

        self.df_final[y_variable] = y

        # additional inputs.
        if is_classification == True:
            # find and add probabilities in the dataset.
            prediction_col_prob = model.predict_proba(df.to_numpy())
            pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

            for c in pd_prediction_col_prob.columns:
                self.df_final["probability_of_predicting_class_" +
                              str(c)] = list(pd_prediction_col_prob[c])

            classes = []
            for c in pd_prediction_col_prob.columns:
                classes.append(str(c))
            self.param["classes"] = classes

            try:
                expected_values_by_class = self.explainer.expected_value
            except:
                expected_values_by_class = []
                for c in range(len(classes)):
                    expected_values_by_class.append(1 / len(classes))

            self.param["expected_values"] = expected_values_by_class
        else:
            try:
                expected_values = self.explainer.expected_value
                self.param["expected_values"] = [expected_values]
            except:
                expected_value = [round(np.array(y).mean(), 2)]
                self.param["expected_values"] = expected_value

        self.param["is_classification"] = is_classification
        self.param["model_name"] = model_name
        self.param["model"] = model
        self.param["columns"] = df.columns
        self.param["y_variable"] = y_variable
        self.param["y_variable_predict"] = y_variable_predict
        self.param['instance_id'] = instance_id

        d = dashboard()
        d.find(self.df_final, mode, self.param)

        return True
コード例 #7
0
ファイル: explain.py プロジェクト: Sohaib90/explainx
    def ai_test(self, df, y, model, model_name="xgboost", mode=None):
        y_variable = "y_actual"
        y_variable_predict = "y_prediction"

        prediction_col = []

        if model_name == "xgboost":
            import xgboost
            if xgboost.__version__ in [
                    '1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1'
            ]:
                print(
                    "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'"
                )
                return False
            prediction_col = model.predict(xgboost.DMatrix(df))

        elif model_name == "catboost":
            prediction_col = model.predict(df.to_numpy())

        else:
            prediction_col = model.predict(df.to_numpy())

        # is classification?
        is_classification = self.is_classification_given_y_array(
            prediction_col)

        # shap
        c = calculate_shap()
        self.df_final, self.explainer = c.find(model,
                                               df,
                                               prediction_col,
                                               is_classification,
                                               model_name=model_name)

        # prediction col
        self.df_final[y_variable_predict] = prediction_col

        self.df_final[y_variable] = y

        # additional inputs.
        if is_classification == True:
            # find and add probabilities in the dataset.
            prediction_col_prob = model.predict_proba(df.to_numpy())
            pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

            for c in pd_prediction_col_prob.columns:
                self.df_final["Probability_" + str(c)] = list(
                    pd_prediction_col_prob[c])

            classes = []
            for c in pd_prediction_col_prob.columns:
                classes.append(str(c))
            self.param["classes"] = classes

            try:
                expected_values_by_class = self.explainer.expected_value
            except:
                expected_values_by_class = []
                for c in range(len(classes)):
                    expected_values_by_class.append(1 / len(classes))

            self.param["expected_values"] = expected_values_by_class
        else:
            try:
                expected_values = self.explainer.expected_value
                self.param["expected_values"] = [expected_values]
            except:
                expected_value = [round(np.array(y).mean(), 2)]
                self.param["expected_values"] = expected_value

        self.param["is_classification"] = is_classification
        self.param["model_name"] = model_name
        self.param["model"] = model
        self.param["columns"] = df.columns
        self.param["y_variable"] = y_variable
        self.param["y_variable_predict"] = y_variable_predict

        # manually test all the graphs to see if all work

        g = plotly_graphs()

        __, df2 = g.feature_importance(self.df_final)
        fim, df2 = g.feature_impact(self.df_final)
        sp = g.summary_plot(self.df_final)

        return True
コード例 #8
0
ファイル: explain.py プロジェクト: thebeastadi/explainx
    def ai_h2o_automl(self, df, y_column_name, model, model_name="h2o", mode=None):
        y_variable = "y_actual"
        y_variable_predict = "y_prediction"
        y_variable = "y_actual"
        y_variable_predict = "y_prediction"
        instance_id = self.random_string_generator()
        analytics = Analytics()
        analytics['ip'] = analytics.finding_ip()
        analytics['mac'] = analytics.finding_address()
        analytics['instance_id'] = instance_id
        analytics['time'] = str(datetime.datetime.now())
        analytics['total_columns'] = len(df.columns)
        analytics['total_rows'] = len(df)
        analytics['os'] = analytics.finding_system()
        analytics['model_name'] = model_name
        analytics["function"] = 'before_dashboard'
        analytics["query"] = "before_dashboard"
        analytics['finish_time'] = ''
        analytics.insert_data()

        # If yes, then different shap functuions are required.
        # get the shap value based on predcton and make a new dataframe.

        # find predictions first as shap values need that.

        prediction_col = []

        if model_name == 'h2o':
            if isinstance(df, pd.DataFrame):
                df = h2o.H2OFrame(df)
            prediction_col = model.predict(df[y_column_name])
        # is classification?

        is_classification = True if model.type == 'classifier' else False
        # shap
        c = calculate_shap()
        self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification,
                                               model_name=model_name)

        # prediction col
        self.df_final[y_variable_predict] = prediction_col.as_data_frame()[y_column_name].tolist()

        self.df_final[y_variable] = df.as_data_frame()[y_column_name].tolist()

        # additional inputs.
        if is_classification is True:
            # find and add probabilities in the dataset.
            try:
                prediction_col_prob = model.predict_proba(df)
            except:
                prediction_col_prob = model.predict(df)
            prediction_col_prob = prediction_col_prob.as_data_frame()

            pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

            for c in pd_prediction_col_prob.columns:
                self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])

            classes = []
            for c in pd_prediction_col_prob.columns:
                classes.append(str(c))
            self.param["classes"] = classes

            try:
                expected_values_by_class = self.explainer.expected_value
            except:
                expected_values_by_class = []
                for c in range(len(classes)):
                    expected_values_by_class.append(1 / len(classes))

            self.param["expected_values"] = expected_values_by_class
        else:
            try:
                expected_values = self.explainer.expected_value
                self.param["expected_values"] = [expected_values]
            except:
                expected_value = [round(np.array(y).mean(), 2)]
                self.param["expected_values"] = expected_value

        self.param["is_classification"] = is_classification
        self.param["model_name"] = model_name
        self.param["model"] = model
        self.param["columns"] = df.columns
        self.param["y_variable"] = y_variable
        self.param["y_variable_predict"] = y_variable_predict
        self.param['instance_id'] = instance_id

        d = dashboard()
        d.find(self.df_final, mode, self.param)

        return True
コード例 #9
0
ファイル: explain.py プロジェクト: thebeastadi/explainx
    def ai(self, df, y, model, model_name="xgboost", mode=None):
        y_variable = "y_actual"
        y_variable_predict = "y_prediction"

        # Code for Analytics
        instance_id = self.random_string_generator()
        analytics = Analytics()
        analytics['ip'] = analytics.finding_ip()
        analytics['mac'] = analytics.finding_address()
        analytics['instance_id'] = instance_id
        analytics['time'] = str(datetime.datetime.now())
        analytics['total_columns'] = len(df.columns)
        analytics['total_rows'] = len(df)
        analytics['os'] = analytics.finding_system()
        analytics['model_name'] = model_name
        analytics["function"] = 'before_dashboard'
        analytics["query"] = "before_dashboard"
        analytics['finish_time'] = ''
        analytics.insert_data()

        prediction_col = []

        if model_name == "xgboost":
            import xgboost
            if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']:
                print(
                    "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'")
                return False
            prediction_col = model.predict(xgboost.DMatrix(df))

        elif model_name == "catboost":
            prediction_col = model.predict(df.to_numpy())

        else:
            prediction_col = model.predict(df)

        # is classification?
        # is_classification = self.is_classification_given_y_array(prediction_col)
        ModelType = lambda model: True if is_classifier(model) else False
        is_classification = ModelType(model)

        # shap
        c = calculate_shap()
        self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name)

        # Append Model Decision & True Labels Columns into the dataset.
        self.df_final[y_variable_predict] = prediction_col
        self.df_final[y_variable] = y

        # additional inputs.
        if is_classification == True:
            # find and add probabilities in the dataset.
            # prediction_col_prob = model.predict_proba(df)
            # pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

            probabilities = model.predict_proba(df)

            for i in range(len(np.unique(prediction_col))):
                self.df_final['Probability: {}'.format(np.unique(prediction_col)[i])] = probabilities[:, i]

            self.param['classes'] = np.unique(prediction_col)

            # for c in pd_prediction_col_prob.columns:
            #   self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])

            # classes = []
            # for c in pd_prediction_col_prob.columns:
            #   classes.append(str(c))
            # self.param["classes"] = classes

            try:
                expected_values_by_class = self.explainer.expected_value
            except:
                expected_values_by_class = []
                for c in range(len(np.unique(prediction_col))):
                    expected_values_by_class.append(1 / len(np.unique(prediction_col)))

            self.param["expected_values"] = expected_values_by_class
        else:
            try:
                expected_values = self.explainer.expected_value
                self.param["expected_values"] = [expected_values]
            except:
                expected_value = [round(np.array(y).mean(), 2)]
                self.param["expected_values"] = expected_value

        self.param["is_classification"] = is_classification
        self.param["model_name"] = model_name
        self.param["model"] = model
        self.param["columns"] = df.columns
        self.param["y_variable"] = y_variable
        self.param["y_variable_predict"] = y_variable_predict
        self.param['instance_id'] = instance_id

        d = dashboard()
        d.find(self.df_final, mode, self.param)

        return True