def baseline_model(self, scoring, SEED, result_col_nm): # Run the baseline models on the unbalanced dataset models = f.GetBasedModel() names, results = f.get_model_performance(self.X_train, self.y_train, models, SEED, scoring) f.PlotBoxR().PlotResult(names, results) _score = f.ScoreDataFrame(names, results, result_col_nm) return _score
corr_threshold) get_highly_corr_feats = pd.DataFrame(get_highly_corr_feats) print('Highly correlated features description more than pearsonsr', corr_threshold) get_highly_corr_feats ### Create Base Model with default hyperparameters and all features ##### SEED = 7 X = x_df_dum.drop(train_df[['TARGET', 'SK_ID_CURR']], axis=1) Y = x_df_dum[['TARGET']] X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.25, random_state=0, stratify=x_df_dum['TARGET']) # Run the baseline models on the unbalanced dataset models = f.GetBasedModel() names, results = f.get_model_performance(X_train, y_train, models, SEED, 'f1_weighted') f.PlotBoxR().PlotResult(names, results) basedLineF1Score = f.ScoreDataFrame(names, results, 'baseline_f1_Score') models = f.GetBasedModel() names, results = f.get_model_performance(X_train, y_train, models, SEED, 'accuracy') f.PlotBoxR().PlotResult(names, results) basedLineAccuracyScore = f.ScoreDataFrame(names, results, 'baseline_accuracy') # Record Scores ScoreCard = pd.concat([basedLineAccuracyScore, basedLineF1Score], axis=1)