コード例 #1
0
from skopt.space import Integer, Real

from sklearn.pipeline import Pipeline

from utility import HyperParameters, Runner
from model import load_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 24

hyper_parameters = HyperParameters(
    search_space={
        'lgb__n_estimators': Integer(50, 500),
        'lgb__learning_rate': Real(0.001, 0.05),
        'lgb__num_leaves': Integer(7, 63),
        'lgb__max_bin': Integer(32, 256),
        'lgb__min_child_samples': Integer(10, 50),
        'lgb__subsample': Real(0.5, 0.9),
        'lgb__colsample_bytree': Real(0.5, 0.9)
    })

lightgbm_basic = Pipeline([('mapper', ordinal_data_mapper),
                           ('lgb', lgb.LGBMClassifier())])


def test_lightgbm_basic():
    runner = Runner('model/experiment/output/lightgbm_basic',
                    load_sample_data_frame(), 'violation', lightgbm_basic,
                    hyper_parameters)
    runner.run_classification_search_experiment('neg_log_loss',
                                                sample=sample,
コード例 #2
0
from skopt.space import Categorical, Integer, Real

from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from utility import HyperParameters, Runner
from model import load_clean_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 24

hyper_parameters = HyperParameters({
    'dt__criterion':
    Categorical(['gini', 'entropy']),
    'dt__max_depth':
    Integer(4, 24),
    'dt__min_samples_leaf':
    Real(0.000001, 0.001),
    'dt__min_samples_split':
    Real(0.000002, 0.002)
})

decision_tree_basic = Pipeline([('mapper', ordinal_data_mapper),
                                ('dt', DecisionTreeClassifier())])


def test_decision_tree():
    runner = Runner('model/experiment/output/decision_tree_basic',
                    load_clean_sample_data_frame(),
                    'arrest',
                    decision_tree_basic,
                    hyper_parameters=hyper_parameters)
コード例 #3
0
from imblearn.under_sampling import RandomUnderSampler

from skopt.space import Categorical, Integer, Real

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline

from utility import HyperParameters, Runner
from model import load_clean_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 12

hyper_parameters = HyperParameters({
    'gb__learning_rate': Real(0.01, 0.1),
    'gb__subsample': Real(0.5, 1),
    'gb__max_depth': Integer(3, 7),
    'gb__max_features': Categorical(['sqrt', 'log2'])
})

gradient_boosting_pipeline = Pipeline([
    ('mapper', ordinal_data_mapper),
    ('gb', GradientBoostingClassifier(n_estimators=200))
])


def test_gradient_boosting():
    runner = Runner(
        'model/experiment/output/gradient_boosting_basic',
        load_clean_sample_data_frame(),
        'arrest',
        gradient_boosting_pipeline,
コード例 #4
0
from skopt.space import Integer, Real

from sklearn.pipeline import Pipeline

from utility import HyperParameters, Runner
from model import load_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 24

hyper_parameters = HyperParameters(
    search_space={
        'xgb__n_estimators': Integer(100, 500),
        'xgb__learning_rate': Real(0.1, 0.3),
        'xgb__gamma': Real(0.0001, 100.0, prior='log-uniform'),
        'xgb__max_depth': Integer(3, 7),
        'xgb__colsample_bytree': Real(0.4, 0.8),
        'xgb__colsample_bylevel': Real(0.4, 0.8),
        'xgb__colsample_bynode': Real(0.4, 0.8)
    })

xgboost_basic = Pipeline([('mapper', ordinal_data_mapper),
                          ('xgb', xgb.XGBClassifier(tree_method='hist'))])


def test_xgboost():
    runner = Runner('model/experiment/output/xgboost_basic_full',
                    load_sample_data_frame(),
                    'arrest',
                    xgboost_basic,
                    hyper_parameters=hyper_parameters)
コード例 #5
0
from skopt.space import Categorical, Integer

from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

from utility import HyperParameters, Runner
from model import load_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 24

hyper_parameters = HyperParameters(search_space={
    'rf__n_estimators': Integer(50, 100),
    'rf__criterion': Categorical(['gini', 'entropy']),
    'rf__max_depth': Integer(4, 18),
    'rf__max_features': Categorical(['sqrt', 'log2', None]),
    'rf__bootstrap': Categorical([True, False])
})

random_forest_basic = Pipeline([
    ('mapper', ordinal_data_mapper),
    ('rf', RandomForestClassifier())
])


def test_random_forest_basic():
    runner = Runner(
        'model/experiment/output/random_forest_basic',
        load_sample_data_frame(),
        'violation',
コード例 #6
0
from skopt.space import Categorical, Integer, Real

from sklearn.ensemble import AdaBoostClassifier
from sklearn.pipeline import Pipeline

from utility import HyperParameters, Runner
from model import load_clean_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 12

hyper_parameters = HyperParameters({
    'ada__n_estimators':
    Integer(50, 200),
    'ada__learning_rate':
    Real(0.5, 1.5),
    'ada__algorithm':
    Categorical(['SAMME', 'SAMME.R'])
})

ada_boost_pipeline = Pipeline([('mapper', ordinal_data_mapper),
                               ('ada', AdaBoostClassifier())])


def test_ada_boost():
    runner = Runner('model/experiment/output/ada_boost_basic',
                    load_clean_sample_data_frame(),
                    'arrest',
                    ada_boost_pipeline,
                    hyper_parameters=hyper_parameters)
    runner.run_classification_search_experiment('roc_auc',
コード例 #7
0
from skopt.space import Categorical, Integer, Real

from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

from utility import HyperParameters, Runner
from model import load_clean_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 12

hyper_parameters = HyperParameters({
    'rf__n_estimators':
    Integer(50, 150),
    'rf__criterion':
    Categorical(['gini', 'entropy']),
    'rf__max_depth':
    Integer(4, 18),
    'rf__max_features':
    Categorical(['sqrt', 'log2'])
})

random_forest_pipeline = Pipeline([('mapper', ordinal_data_mapper),
                                   ('rf', RandomForestClassifier())])


def test_random_forest():
    runner = Runner('model/experiment/output/random_forest_basic',
                    load_clean_sample_data_frame(),
                    'arrest',
                    random_forest_pipeline,
                    hyper_parameters=hyper_parameters)