verbose=False)
    else:
        print("wrong fuzzy option")

    # grid search results data frame:
    gs_results_df = training_utils.get_gs_results(clf_gs)

    # best parameters from grid search:
    best_param_df = pd.DataFrame(clf_gs.best_params_, index=[0])

    # evaluation:
    clf_for_eval.set_params(**clf_gs.best_params_)
    metrics, std = training_utils.evaluate_on_cv(training_data,
                                                 train_X_all,
                                                 clf_for_eval,
                                                 fuzzy_option,
                                                 fuzzy_dist_column,
                                                 fuzzy_err_column,
                                                 xgb_flag=True)
    pr_curve = training_utils.predict_and_pr_curve_on_cv(training_data,
                                                         train_X_all,
                                                         clf_for_eval,
                                                         fuzzy_option,
                                                         fuzzy_dist_column,
                                                         fuzzy_err_column,
                                                         xgb_flag=True)

    # best model from grid search:
    clf_best = clf_gs.best_estimator_

    # generalization:
Ejemplo n.º 2
0
train_X = training_data[features].values
general_X = general_data[features].values

for fuzzy_option in fuzzy_options:

    print(fuzzy_option)

    clf = ExtraTreesClassifier(n_estimators=500,
                               criterion='gini',
                               class_weight='balanced',
                               bootstrap=True,
                               random_state=476,
                               n_jobs=-1)

    metrics, std = training_utils.evaluate_on_cv(training_data, train_X, clf,
                                                 fuzzy_option,
                                                 fuzzy_dist_column,
                                                 fuzzy_err_column)
    pr_curve = training_utils.predict_and_pr_curve_on_cv(
        training_data, train_X, clf, fuzzy_option, fuzzy_dist_column,
        fuzzy_err_column)

    # fit to the data:
    if fuzzy_option == "normal":
        clf.fit(X=train_X, y=training_data["Y"])
    elif fuzzy_option == "fuzzy_dist":
        clf.fit(X=train_X,
                y=training_data["Y"],
                sample_weight=training_data[fuzzy_dist_column].values.T[0])
    elif fuzzy_option == "fuzzy_err":
        clf.fit(X=train_X,
                y=training_data["Y"],