def test_lightgbm_basic():
    runner = Runner('model/experiment/output/lightgbm_basic',
                    load_sample_data_frame(), 'violation', lightgbm_basic,
                    hyper_parameters)
    runner.run_classification_search_experiment('neg_log_loss',
                                                sample=sample,
                                                n_iter=iterations,
                                                multiclass=True,
                                                record_predict_proba=True)
Example #2
0
def test_decision_tree():
    runner = Runner(
        'model/experiment/output/decision_tree_basic_full',
        load_sample_data_frame(),
        'arrest',
        decision_tree_basic,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True
    )

    runner = Runner(
        'model/experiment/output/decision_tree_under_sampled_full',
        load_sample_data_frame(),
        'arrest',
        decision_tree_basic,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=RandomUnderSampler()
    )

    runner = Runner(
        'model/experiment/output/decision_tree_over_sampled_full',
        load_sample_data_frame(),
        'arrest',
        decision_tree_basic,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=SMOTE()
    )

    runner = Runner(
        'model/experiment/output/decision_tree_combine_sampled_full',
        load_sample_data_frame(),
        'arrest',
        decision_tree_basic,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=SMOTEENN()
    )
Example #3
0
def test_gradient_boosting():
    runner = Runner(
        'model/experiment/output/gradient_boosting_basic',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True
    )

    runner = Runner(
        'model/experiment/output/gradient_boosting_under_sampled',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=RandomUnderSampler()
    )

    runner = Runner(
        'model/experiment/output/gradient_boosting_over_sampled',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=SMOTE()
    )

    runner = Runner(
        'model/experiment/output/gradient_boosting_combine_sampled',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=SMOTEENN()
    )
def build_xgboost_model():
    runner = Runner('model/output/xgboost_basic', load_clean_data_frame(),
                    'arrest', xgboost_pipeline, hyper_parameters)
    runner.run_classification_search_experiment('roc_auc',
                                                sample=sample,
                                                n_iter=iterations,
                                                record_predict_proba=True)
    joblib.dump(runner.trained_estimator, 'model/output/xgboost_basic.joblib')

    runner = Runner('model/output/xgboost_basic_fs', load_clean_data_frame(),
                    'arrest', xgboost_pipeline_fs, hyper_parameters)
    runner.run_classification_search_experiment('roc_auc',
                                                sample=sample,
                                                n_iter=iterations,
                                                record_predict_proba=True)
    joblib.dump(runner.trained_estimator,
                'model/output/xgboost_basic_fs.joblib')