Esempio n. 1
0
def show_scores(index_train, index_test, X, probs, weights):
    auc_test = bootstrap(
        1000, 0.75,
        np.array(probs)[:, 1][index_test],
        expit(X[index_test].dot(np.array(weights))),
        lambda a1, a2: eval_metric(a1, a2, 'AUC'))  # на классе 1
    auc_train = bootstrap(1000, 0.75,
                          np.array(probs)[:, 1][index_train],
                          expit(X[index_train].dot(np.array(weights))),
                          lambda a1, a2: eval_metric(a1, a2, 'AUC'))
    print("Standard weights\n")
    print(weights)
    # feature_weights_table(features, weights)
    print("AUC test", auc_test)
    print("AUC train", auc_train)
def train(features, iterations=100, train_size=0.7):
    corpus_path = os.path.join(ROOT, 'corpus/calculated_metrics.csv')
    _df = pd.read_csv(corpus_path, index_col=0)

    mistakes = _df[(_df['review'] == 5) & (_df['true_mask_pixels'] == 0) &
                   (_df['pred_mask_pixels'] != 0)]

    results = []
    results_rint = []
    models_data = []

    for _ in tqdm(range(iterations)):
        target = 'review'
        df = _df[features + [target]]

        df_train, df_test = train_test_split(df, train_size=train_size)

        train_pool = Pool(df_train[features], label=df_train[target])
        test_pool = Pool(df_test[features], label=df_test[target])

        cb_mae = CatBoostRegressor(loss_function='MAE', silent=True)
        cb_mae.fit(train_pool, eval_set=test_pool)

        models_data.append((cb_mae, df_test))

        cb_mae_pred = cb_mae.predict(test_pool)
        cb_mae_pred_rint = np.rint(cb_mae_pred)

        results.append(
            eval_metric(df_test[target].to_numpy(), cb_mae_pred, 'MAE'))
        results_rint.append(
            eval_metric(df_test[target].to_numpy(), cb_mae_pred_rint, 'MAE'))

    best_model_data = models_data[int(np.amin(results_rint))]

    return best_model_data
def main(model_name=None, iterations=100, train_size=0.7):
    features = [
        'true_mask_pixels', 'pred_mask_pixels', 'DICE', 'TP', 'FP', 'REFVOL',
        'MUTINF'
    ]
    model, test_data = train(features, iterations, train_size)

    pred = np.rint(model.predict(test_data))
    metric = eval_metric(test_data['review'].to_numpy(), pred, 'MAE')[0]

    model_name = model_name if model_name else f"cb_mae-{round(metric, 4)}.cbm"

    savepath = os.path.join(ROOT, 'models', 'catboost', model_name)
    model.save_model(savepath, format="cbm")

    print('Model was saved to ' + savepath)
Esempio n. 4
0
def test_util_eval_metric_multiclass(metric):
    metric_results = eval_metric([1, 0, 2], [[0.88, 0.22, 0.3], [0.21, 0.45, 0.1], [0.12, 0.32, 0.9]], metric)
    np.savetxt(PREDS_PATH, np.array(metric_results))
    return local_canonical_file(PREDS_PATH)
Esempio n. 5
0
def test_util_eval_metric(metric):
    metric_results = eval_metric([1, 0], [0.88, 0.22], metric)
    np.savetxt(PREDS_PATH, np.array(metric_results))
    return local_canonical_file(PREDS_PATH)
Esempio n. 6
0
fig, ax = plt.subplots()
sns.distplot(model1.predict_proba(X_test)[y_test['is_converted'] == 1, 1],
             kde=False)
sns.distplot(y_test_pred_prob[y_test['is_converted'] == 1], kde=False)
plt.show()
# -

# ## Test on test data

# +
test_pool = Pool(X_test, y_test, cat_features=categorical_features_step1)
print('\nTarget on test data', int(np.sum(y_test_pred)), ' positives vs ',
      int(np.sum(y_test)), ' in reality')

# Accuracy Score on test dataset
accuracy_test = eval_metric(np.array(y_test), y_test_pred, 'Accuracy')
print('\naccuracy_score on test dataset : ', accuracy_test)

# Precision Score on test dataset
Precision_test = eval_metric(np.array(y_test), y_test_pred, 'Precision')
print('\nPrecision_score on test dataset : ', Precision_test)

# Recall Score on test dataset
Recall_test = eval_metric(np.array(y_test), y_test_pred, 'Recall')
print('\nRecall_score on test dataset : ', Recall_test)

# F1 Score on test dataset
F1_test = eval_metric(np.array(y_test), y_test_pred, 'F1')
print('\nF1_score on test dataset : ', F1_test)
# -