def evaluate():

    input_file = os.path.join("outputs", "features",
                              "function_and_human_readable_features.json")
    X, y = preprocess.get_features_and_labels(input_file)
    skf = StratifiedKFold(n_splits=5, random_state=42)
    splits = list(skf.split(X, y))
    svm_run(X, y, splits)
    decision_tree_run(X, y, splits)
    for loss in [
            'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'
    ]:
        sgd_run(X, y, splits, loss)
Esempio n. 2
0
            -1: 20
        }, 'balanced']
    }

    n_iter_search = 100
    random_search = RandomizedSearchCV(DecisionTreeClassifier(),
                                       param_distributions=param_dist,
                                       n_iter=n_iter_search,
                                       scoring={
                                           'precision': precision_scorer,
                                           'recall': recall_scorer,
                                           'f1': f1_scorer,
                                           'geometric_mean': gmean_scorer
                                       },
                                       cv=cv,
                                       refit='f1',
                                       n_jobs=-1,
                                       random_state=42)

    random_search.fit(X, y)

    return report(random_search.cv_results_)


if __name__ == "__main__":
    print("-- Random Parameter Search via 4-fold CV")
    input_file = os.path.join("outputs", "features",
                              "function_and_human_readable_features.json")
    X, y = preprocess.get_features_and_labels(input_file)
    _ = decision_tree_randomized_search(X, y)