def test_gradient_boosting(): runner = Runner( 'model/experiment/output/gradient_boosting_basic', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True ) runner = Runner( 'model/experiment/output/gradient_boosting_under_sampled', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=RandomUnderSampler() ) runner = Runner( 'model/experiment/output/gradient_boosting_over_sampled', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=SMOTE() ) runner = Runner( 'model/experiment/output/gradient_boosting_combine_sampled', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline, hyper_parameters=hyper_parameters ) runner.run_classification_search_experiment( 'roc_auc', sample=sample, n_iter=iterations, record_predict_proba=True, sampling=SMOTEENN() )
def test_sgd_log_loss(): runner = Runner('model/experiment/output/sgd_log_loss_basic', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1) runner = Runner('model/experiment/output/sgd_log_loss_under_sampled', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=RandomUnderSampler()) runner = Runner('model/experiment/output/sgd_log_loss_over_sampled', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTE()) runner = Runner('model/experiment/output/sgd_log_loss_combine_sampled', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTEENN())
def test_gaussian_naive_bayes(): runner = Runner('model/experiment/output/gaussian_naive_bayes_basic', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1) runner = Runner( 'model/experiment/output/gaussian_naive_bayes_under_sampled', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1, sampling=RandomUnderSampler()) runner = Runner( 'model/experiment/output/gaussian_naive_bayes_over_sampled', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1, sampling=SMOTE()) runner = Runner( 'model/experiment/output/gaussian_naive_bayes_combine_sampled', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1, sampling=SMOTEENN())
def test_neural_network(): runner = Runner( 'model/experiment/output/neural_network_basic', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1) runner = Runner( 'model/experiment/output/neural_network_under_sampled', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=RandomUnderSampler()) runner = Runner( 'model/experiment/output/neural_network_over_sampled', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTE()) runner = Runner( 'model/experiment/output/neural_network_combine_sampled', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTEENN())