def ai(self, df, y, model, model_name="xgboost", mode=None): y_variable = "y_actual" y_variable_predict = "y_prediction" #shap c = calculate_shap() self.df_final = c.find(model, df, model_name=model_name) #prediction col if model_name == "xgboost": self.df_final[y_variable_predict] = model.predict( xgboost.DMatrix(df)) elif model_name == "catboost": self.df_final[y_variable_predict] = model.predict(df.to_numpy()) else: self.df_final[y_variable_predict] = model.predict(df.to_numpy()) self.df_final[y_variable] = y d = dashboard() d.find(self.df_final, y_variable, y_variable_predict, mode) return True
def calculate_prediction_shap(self, df): if self.param["model_name"] == "xgboost": import xgboost if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']: print( "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'") return False prediction_col = self.param["model"].predict(xgboost.DMatrix(df)) elif self.param["model_name"] == "catboost": prediction_col = self.param["model"].predict(df.to_numpy()) else: prediction_col = self.param["model"].predict(df.to_numpy()) # is classification? is_classification = self.param["is_classification"] # shap c = calculate_shap() df_final, explainer = c.find(self.param["model"], df, prediction_col, is_classification, model_name=self.param["model_name"]) # prediction col df_final[self.param["y_variable_predict"]] = prediction_col return df_final
def calculate_prediction_shap(self, df): if self.param["model_name"] == "xgboost": import xgboost if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']: print( "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'") return False prediction_col = self.param["model"].predict(xgboost.DMatrix(df)) elif self.param["model_name"] == "catboost": prediction_col = self.param["model"].predict(df.to_numpy()) elif self.param['model_name'] == 'h2o': df = h2o.H2OFrame(df) prediction_col = self.param["model"].predict(df) else: prediction_col = self.param["model"].predict(df.to_numpy()) # is classification? is_classification = self.param["is_classification"] # shap c = calculate_shap() df_final, explainer = c.find(self.param["model"], df, prediction_col, is_classification, model_name=self.param["model_name"]) # prediction col # df_final["y_prediction"] = prediction_col if is_classification is True: try: df_final = self.formatting_y_pred_for_h2o_classification(df_final, prediction_col) # find and add probabilities in the dataset. prediction_col_prob = self.param["model"].predict_proba(df.to_numpy()) except: prediction_col_prob = self.param["model"].predict(df) prediction_col_prob = prediction_col_prob.as_data_frame() pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) for c in pd_prediction_col_prob.columns: df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c]) # for c in pd_prediction_col_prob.columns: # df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c]) # if c != 'predict': # if "p" in c: # res = c.split("p")[-1] # df_final["Probability_" + str(res)] = list(pd_prediction_col_prob[c]) # else: # df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c]) # else: # df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c]) df_final = self.formatting_h2o_prediction_prob(df_final, pd_prediction_col_prob) return df_final
def calculate_prediction_shap(self, df): if self.param["model_name"] == "xgboost": import xgboost if xgboost.__version__ in [ '1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1' ]: print( "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'" ) return False prediction_col = self.param["model"].predict(xgboost.DMatrix(df)) elif self.param["model_name"] == "catboost": prediction_col = self.param["model"].predict(df.to_numpy()) else: prediction_col = self.param["model"].predict(df.to_numpy()) # is classification? is_classification = self.param["is_classification"] # shap c = calculate_shap() df_final, explainer = c.find(self.param["model"], df, prediction_col, is_classification, model_name=self.param["model_name"]) # prediction col df_final["y_prediction"] = prediction_col if is_classification == True: # find and add probabilities in the dataset. prediction_col_prob = self.param["model"].predict_proba( df.to_numpy()) pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) for c in pd_prediction_col_prob.columns: df_final["Probability_" + str(c)] = list( pd_prediction_col_prob[c]) return df_final
def ai(self, df, y, model, model_name="xgboost", mode=None): y_variable= "y_actual" y_variable_predict= "y_prediction" # If yes, then different shap functuions are required. # get the shap value based on predcton and make a new dataframe. # find predictions first as shap values need that. prediction_col=[] if model_name == "xgboost": import xgboost if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']: print("Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'") return False prediction_col = model.predict(xgboost.DMatrix(df)) elif model_name == "catboost": prediction_col = model.predict(df.to_numpy()) else: prediction_col = model.predict(df.to_numpy()) # is classification? is_classification = self.is_classification_given_y_array(prediction_col) #shap c = calculate_shap() self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name) #prediction col self.df_final[y_variable_predict] = prediction_col self.df_final[y_variable] = y #additional inputs. if is_classification==True: # find and add probabilities in the dataset. prediction_col_prob = model.predict_proba(df.to_numpy()) pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) for c in pd_prediction_col_prob.columns: self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) classes = [] for c in pd_prediction_col_prob.columns: classes.append(str(c)) self.param["classes"]=classes try: expected_values_by_class = self.explainer.expected_value except: expected_values_by_class=[] for c in range(len(classes)): expected_values_by_class.append(1/len(classes)) self.param["expected_values"]= expected_values_by_class else: try: expected_values = self.explainer.expected_value self.param["expected_values"] = [expected_values] except: expected_value = [round(np.array(y).mean(),2)] self.param["expected_values"] = expected_value self.param["is_classification"]= is_classification self.param["model_name"]= model_name d= dashboard() d.find(self.df_final, y_variable, y_variable_predict, mode, self.param) return True
def ai(self, df, y, model, model_name="xgboost", mode=None): y_variable = "y_actual" y_variable_predict = "y_prediction" instance_id = self.random_string_generator() analytics = Analytics() analytics['ip'] = analytics.finding_ip() analytics['mac'] = analytics.finding_address() analytics['instance_id'] = instance_id analytics['time'] = str(datetime.datetime.now()) analytics['total_columns'] = len(df.columns) analytics['total_rows'] = len(df) analytics['os'] = analytics.finding_system() analytics['model_name'] = model_name analytics["function"] = 'before_dashboard' analytics["query"] = "before_dashboard" analytics['finish_time'] = '' analytics.insert_data() # If yes, then different shap functuions are required. # get the shap value based on predcton and make a new dataframe. # find predictions first as shap values need that. prediction_col = [] if model_name == "xgboost": import xgboost if xgboost.__version__ in [ '1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1' ]: print( "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'" ) return False prediction_col = model.predict(xgboost.DMatrix(df)) elif model_name == "catboost": prediction_col = model.predict(df.to_numpy()) else: prediction_col = model.predict(df.to_numpy()) # is classification? is_classification = self.is_classification_given_y_array( prediction_col) # shap c = calculate_shap() self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name) # prediction col self.df_final[y_variable_predict] = prediction_col self.df_final[y_variable] = y # additional inputs. if is_classification == True: # find and add probabilities in the dataset. prediction_col_prob = model.predict_proba(df.to_numpy()) pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) for c in pd_prediction_col_prob.columns: self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) classes = [] for c in pd_prediction_col_prob.columns: classes.append(str(c)) self.param["classes"] = classes try: expected_values_by_class = self.explainer.expected_value except: expected_values_by_class = [] for c in range(len(classes)): expected_values_by_class.append(1 / len(classes)) self.param["expected_values"] = expected_values_by_class else: try: expected_values = self.explainer.expected_value self.param["expected_values"] = [expected_values] except: expected_value = [round(np.array(y).mean(), 2)] self.param["expected_values"] = expected_value self.param["is_classification"] = is_classification self.param["model_name"] = model_name self.param["model"] = model self.param["columns"] = df.columns self.param["y_variable"] = y_variable self.param["y_variable_predict"] = y_variable_predict self.param['instance_id'] = instance_id d = dashboard() d.find(self.df_final, mode, self.param) return True
def ai_test(self, df, y, model, model_name="xgboost", mode=None): y_variable = "y_actual" y_variable_predict = "y_prediction" prediction_col = [] if model_name == "xgboost": import xgboost if xgboost.__version__ in [ '1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1' ]: print( "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'" ) return False prediction_col = model.predict(xgboost.DMatrix(df)) elif model_name == "catboost": prediction_col = model.predict(df.to_numpy()) else: prediction_col = model.predict(df.to_numpy()) # is classification? is_classification = self.is_classification_given_y_array( prediction_col) # shap c = calculate_shap() self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name) # prediction col self.df_final[y_variable_predict] = prediction_col self.df_final[y_variable] = y # additional inputs. if is_classification == True: # find and add probabilities in the dataset. prediction_col_prob = model.predict_proba(df.to_numpy()) pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) for c in pd_prediction_col_prob.columns: self.df_final["Probability_" + str(c)] = list( pd_prediction_col_prob[c]) classes = [] for c in pd_prediction_col_prob.columns: classes.append(str(c)) self.param["classes"] = classes try: expected_values_by_class = self.explainer.expected_value except: expected_values_by_class = [] for c in range(len(classes)): expected_values_by_class.append(1 / len(classes)) self.param["expected_values"] = expected_values_by_class else: try: expected_values = self.explainer.expected_value self.param["expected_values"] = [expected_values] except: expected_value = [round(np.array(y).mean(), 2)] self.param["expected_values"] = expected_value self.param["is_classification"] = is_classification self.param["model_name"] = model_name self.param["model"] = model self.param["columns"] = df.columns self.param["y_variable"] = y_variable self.param["y_variable_predict"] = y_variable_predict # manually test all the graphs to see if all work g = plotly_graphs() __, df2 = g.feature_importance(self.df_final) fim, df2 = g.feature_impact(self.df_final) sp = g.summary_plot(self.df_final) return True
def ai_h2o_automl(self, df, y_column_name, model, model_name="h2o", mode=None): y_variable = "y_actual" y_variable_predict = "y_prediction" y_variable = "y_actual" y_variable_predict = "y_prediction" instance_id = self.random_string_generator() analytics = Analytics() analytics['ip'] = analytics.finding_ip() analytics['mac'] = analytics.finding_address() analytics['instance_id'] = instance_id analytics['time'] = str(datetime.datetime.now()) analytics['total_columns'] = len(df.columns) analytics['total_rows'] = len(df) analytics['os'] = analytics.finding_system() analytics['model_name'] = model_name analytics["function"] = 'before_dashboard' analytics["query"] = "before_dashboard" analytics['finish_time'] = '' analytics.insert_data() # If yes, then different shap functuions are required. # get the shap value based on predcton and make a new dataframe. # find predictions first as shap values need that. prediction_col = [] if model_name == 'h2o': if isinstance(df, pd.DataFrame): df = h2o.H2OFrame(df) prediction_col = model.predict(df[y_column_name]) # is classification? is_classification = True if model.type == 'classifier' else False # shap c = calculate_shap() self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name) # prediction col self.df_final[y_variable_predict] = prediction_col.as_data_frame()[y_column_name].tolist() self.df_final[y_variable] = df.as_data_frame()[y_column_name].tolist() # additional inputs. if is_classification is True: # find and add probabilities in the dataset. try: prediction_col_prob = model.predict_proba(df) except: prediction_col_prob = model.predict(df) prediction_col_prob = prediction_col_prob.as_data_frame() pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) for c in pd_prediction_col_prob.columns: self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) classes = [] for c in pd_prediction_col_prob.columns: classes.append(str(c)) self.param["classes"] = classes try: expected_values_by_class = self.explainer.expected_value except: expected_values_by_class = [] for c in range(len(classes)): expected_values_by_class.append(1 / len(classes)) self.param["expected_values"] = expected_values_by_class else: try: expected_values = self.explainer.expected_value self.param["expected_values"] = [expected_values] except: expected_value = [round(np.array(y).mean(), 2)] self.param["expected_values"] = expected_value self.param["is_classification"] = is_classification self.param["model_name"] = model_name self.param["model"] = model self.param["columns"] = df.columns self.param["y_variable"] = y_variable self.param["y_variable_predict"] = y_variable_predict self.param['instance_id'] = instance_id d = dashboard() d.find(self.df_final, mode, self.param) return True
def ai(self, df, y, model, model_name="xgboost", mode=None): y_variable = "y_actual" y_variable_predict = "y_prediction" # Code for Analytics instance_id = self.random_string_generator() analytics = Analytics() analytics['ip'] = analytics.finding_ip() analytics['mac'] = analytics.finding_address() analytics['instance_id'] = instance_id analytics['time'] = str(datetime.datetime.now()) analytics['total_columns'] = len(df.columns) analytics['total_rows'] = len(df) analytics['os'] = analytics.finding_system() analytics['model_name'] = model_name analytics["function"] = 'before_dashboard' analytics["query"] = "before_dashboard" analytics['finish_time'] = '' analytics.insert_data() prediction_col = [] if model_name == "xgboost": import xgboost if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']: print( "Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'") return False prediction_col = model.predict(xgboost.DMatrix(df)) elif model_name == "catboost": prediction_col = model.predict(df.to_numpy()) else: prediction_col = model.predict(df) # is classification? # is_classification = self.is_classification_given_y_array(prediction_col) ModelType = lambda model: True if is_classifier(model) else False is_classification = ModelType(model) # shap c = calculate_shap() self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name) # Append Model Decision & True Labels Columns into the dataset. self.df_final[y_variable_predict] = prediction_col self.df_final[y_variable] = y # additional inputs. if is_classification == True: # find and add probabilities in the dataset. # prediction_col_prob = model.predict_proba(df) # pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) probabilities = model.predict_proba(df) for i in range(len(np.unique(prediction_col))): self.df_final['Probability: {}'.format(np.unique(prediction_col)[i])] = probabilities[:, i] self.param['classes'] = np.unique(prediction_col) # for c in pd_prediction_col_prob.columns: # self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) # classes = [] # for c in pd_prediction_col_prob.columns: # classes.append(str(c)) # self.param["classes"] = classes try: expected_values_by_class = self.explainer.expected_value except: expected_values_by_class = [] for c in range(len(np.unique(prediction_col))): expected_values_by_class.append(1 / len(np.unique(prediction_col))) self.param["expected_values"] = expected_values_by_class else: try: expected_values = self.explainer.expected_value self.param["expected_values"] = [expected_values] except: expected_value = [round(np.array(y).mean(), 2)] self.param["expected_values"] = expected_value self.param["is_classification"] = is_classification self.param["model_name"] = model_name self.param["model"] = model self.param["columns"] = df.columns self.param["y_variable"] = y_variable self.param["y_variable_predict"] = y_variable_predict self.param['instance_id'] = instance_id d = dashboard() d.find(self.df_final, mode, self.param) return True