Example #1
0
def test_gradient_boosting():
    runner = Runner(
        'model/experiment/output/gradient_boosting_basic',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True
    )

    runner = Runner(
        'model/experiment/output/gradient_boosting_under_sampled',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=RandomUnderSampler()
    )

    runner = Runner(
        'model/experiment/output/gradient_boosting_over_sampled',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=SMOTE()
    )

    runner = Runner(
        'model/experiment/output/gradient_boosting_combine_sampled',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
        hyper_parameters=hyper_parameters
    )
    runner.run_classification_search_experiment(
        'roc_auc',
        sample=sample,
        n_iter=iterations,
        record_predict_proba=True,
        sampling=SMOTEENN()
    )
Example #2
0
def test_sgd_log_loss():
    runner = Runner('model/experiment/output/sgd_log_loss_basic',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1)

    runner = Runner('model/experiment/output/sgd_log_loss_under_sampled',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=RandomUnderSampler())

    runner = Runner('model/experiment/output/sgd_log_loss_over_sampled',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTE())

    runner = Runner('model/experiment/output/sgd_log_loss_combine_sampled',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTEENN())
def test_gaussian_naive_bayes():
    runner = Runner('model/experiment/output/gaussian_naive_bayes_basic',
                    load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1)

    runner = Runner(
        'model/experiment/output/gaussian_naive_bayes_under_sampled',
        load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1,
        sampling=RandomUnderSampler())

    runner = Runner(
        'model/experiment/output/gaussian_naive_bayes_over_sampled',
        load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1,
        sampling=SMOTE())

    runner = Runner(
        'model/experiment/output/gaussian_naive_bayes_combine_sampled',
        load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1,
        sampling=SMOTEENN())
Example #4
0
def test_neural_network():
    runner = Runner(
        'model/experiment/output/neural_network_basic',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1)

    runner = Runner(
        'model/experiment/output/neural_network_under_sampled',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=RandomUnderSampler())

    runner = Runner(
        'model/experiment/output/neural_network_over_sampled',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTE())

    runner = Runner(
        'model/experiment/output/neural_network_combine_sampled',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTEENN())