def show_scores(index_train, index_test, X, probs, weights): auc_test = bootstrap( 1000, 0.75, np.array(probs)[:, 1][index_test], expit(X[index_test].dot(np.array(weights))), lambda a1, a2: eval_metric(a1, a2, 'AUC')) # на классе 1 auc_train = bootstrap(1000, 0.75, np.array(probs)[:, 1][index_train], expit(X[index_train].dot(np.array(weights))), lambda a1, a2: eval_metric(a1, a2, 'AUC')) print("Standard weights\n") print(weights) # feature_weights_table(features, weights) print("AUC test", auc_test) print("AUC train", auc_train)
def train(features, iterations=100, train_size=0.7): corpus_path = os.path.join(ROOT, 'corpus/calculated_metrics.csv') _df = pd.read_csv(corpus_path, index_col=0) mistakes = _df[(_df['review'] == 5) & (_df['true_mask_pixels'] == 0) & (_df['pred_mask_pixels'] != 0)] results = [] results_rint = [] models_data = [] for _ in tqdm(range(iterations)): target = 'review' df = _df[features + [target]] df_train, df_test = train_test_split(df, train_size=train_size) train_pool = Pool(df_train[features], label=df_train[target]) test_pool = Pool(df_test[features], label=df_test[target]) cb_mae = CatBoostRegressor(loss_function='MAE', silent=True) cb_mae.fit(train_pool, eval_set=test_pool) models_data.append((cb_mae, df_test)) cb_mae_pred = cb_mae.predict(test_pool) cb_mae_pred_rint = np.rint(cb_mae_pred) results.append( eval_metric(df_test[target].to_numpy(), cb_mae_pred, 'MAE')) results_rint.append( eval_metric(df_test[target].to_numpy(), cb_mae_pred_rint, 'MAE')) best_model_data = models_data[int(np.amin(results_rint))] return best_model_data
def main(model_name=None, iterations=100, train_size=0.7): features = [ 'true_mask_pixels', 'pred_mask_pixels', 'DICE', 'TP', 'FP', 'REFVOL', 'MUTINF' ] model, test_data = train(features, iterations, train_size) pred = np.rint(model.predict(test_data)) metric = eval_metric(test_data['review'].to_numpy(), pred, 'MAE')[0] model_name = model_name if model_name else f"cb_mae-{round(metric, 4)}.cbm" savepath = os.path.join(ROOT, 'models', 'catboost', model_name) model.save_model(savepath, format="cbm") print('Model was saved to ' + savepath)
def test_util_eval_metric_multiclass(metric): metric_results = eval_metric([1, 0, 2], [[0.88, 0.22, 0.3], [0.21, 0.45, 0.1], [0.12, 0.32, 0.9]], metric) np.savetxt(PREDS_PATH, np.array(metric_results)) return local_canonical_file(PREDS_PATH)
def test_util_eval_metric(metric): metric_results = eval_metric([1, 0], [0.88, 0.22], metric) np.savetxt(PREDS_PATH, np.array(metric_results)) return local_canonical_file(PREDS_PATH)
fig, ax = plt.subplots() sns.distplot(model1.predict_proba(X_test)[y_test['is_converted'] == 1, 1], kde=False) sns.distplot(y_test_pred_prob[y_test['is_converted'] == 1], kde=False) plt.show() # - # ## Test on test data # + test_pool = Pool(X_test, y_test, cat_features=categorical_features_step1) print('\nTarget on test data', int(np.sum(y_test_pred)), ' positives vs ', int(np.sum(y_test)), ' in reality') # Accuracy Score on test dataset accuracy_test = eval_metric(np.array(y_test), y_test_pred, 'Accuracy') print('\naccuracy_score on test dataset : ', accuracy_test) # Precision Score on test dataset Precision_test = eval_metric(np.array(y_test), y_test_pred, 'Precision') print('\nPrecision_score on test dataset : ', Precision_test) # Recall Score on test dataset Recall_test = eval_metric(np.array(y_test), y_test_pred, 'Recall') print('\nRecall_score on test dataset : ', Recall_test) # F1 Score on test dataset F1_test = eval_metric(np.array(y_test), y_test_pred, 'F1') print('\nF1_score on test dataset : ', F1_test) # -