def test_lightgbm_basic(): runner = Runner('model/experiment/output/lightgbm_basic', load_sample_data_frame(), 'violation', lightgbm_basic, hyper_parameters) runner.run_classification_search_experiment('neg_log_loss', sample=sample, n_iter=iterations, multiclass=True, record_predict_proba=True)
def test_decision_tree(): runner = Runner( 'model/experiment/output/decision_tree_basic_full', load_sample_data_frame(), 'arrest', decision_tree_basic, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True ) runner = Runner( 'model/experiment/output/decision_tree_under_sampled_full', load_sample_data_frame(), 'arrest', decision_tree_basic, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=RandomUnderSampler() ) runner = Runner( 'model/experiment/output/decision_tree_over_sampled_full', load_sample_data_frame(), 'arrest', decision_tree_basic, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=SMOTE() ) runner = Runner( 'model/experiment/output/decision_tree_combine_sampled_full', load_sample_data_frame(), 'arrest', decision_tree_basic, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=SMOTEENN() )
def test_gradient_boosting(): runner = Runner( 'model/experiment/output/gradient_boosting_basic', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True ) runner = Runner( 'model/experiment/output/gradient_boosting_under_sampled', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=RandomUnderSampler() ) runner = Runner( 'model/experiment/output/gradient_boosting_over_sampled', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=SMOTE() ) runner = Runner( 'model/experiment/output/gradient_boosting_combine_sampled', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=SMOTEENN() )
def build_xgboost_model(): runner = Runner('model/output/xgboost_basic', load_clean_data_frame(), 'arrest', xgboost_pipeline, hyper_parameters) runner.run_classification_search_experiment('roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True) joblib.dump(runner.trained_estimator, 'model/output/xgboost_basic.joblib') runner = Runner('model/output/xgboost_basic_fs', load_clean_data_frame(), 'arrest', xgboost_pipeline_fs, hyper_parameters) runner.run_classification_search_experiment('roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True) joblib.dump(runner.trained_estimator, 'model/output/xgboost_basic_fs.joblib')