from skopt.space import Integer, Real from sklearn.pipeline import Pipeline from utility import HyperParameters, Runner from model import load_sample_data_frame, ordinal_data_mapper sample = None iterations = 24 hyper_parameters = HyperParameters( search_space={ 'lgb__n_estimators': Integer(50, 500), 'lgb__learning_rate': Real(0.001, 0.05), 'lgb__num_leaves': Integer(7, 63), 'lgb__max_bin': Integer(32, 256), 'lgb__min_child_samples': Integer(10, 50), 'lgb__subsample': Real(0.5, 0.9), 'lgb__colsample_bytree': Real(0.5, 0.9) }) lightgbm_basic = Pipeline([('mapper', ordinal_data_mapper), ('lgb', lgb.LGBMClassifier())]) def test_lightgbm_basic(): runner = Runner('model/experiment/output/lightgbm_basic', load_sample_data_frame(), 'violation', lightgbm_basic, hyper_parameters) runner.run_classification_search_experiment('neg_log_loss', sample=sample,
from skopt.space import Categorical, Integer, Real from sklearn.pipeline import Pipeline from sklearn.tree import DecisionTreeClassifier from utility import HyperParameters, Runner from model import load_clean_sample_data_frame, ordinal_data_mapper sample = None iterations = 24 hyper_parameters = HyperParameters({ 'dt__criterion': Categorical(['gini', 'entropy']), 'dt__max_depth': Integer(4, 24), 'dt__min_samples_leaf': Real(0.000001, 0.001), 'dt__min_samples_split': Real(0.000002, 0.002) }) decision_tree_basic = Pipeline([('mapper', ordinal_data_mapper), ('dt', DecisionTreeClassifier())]) def test_decision_tree(): runner = Runner('model/experiment/output/decision_tree_basic', load_clean_sample_data_frame(), 'arrest', decision_tree_basic, hyper_parameters=hyper_parameters)
from imblearn.under_sampling import RandomUnderSampler from skopt.space import Categorical, Integer, Real from sklearn.ensemble import GradientBoostingClassifier from sklearn.pipeline import Pipeline from utility import HyperParameters, Runner from model import load_clean_sample_data_frame, ordinal_data_mapper sample = None iterations = 12 hyper_parameters = HyperParameters({ 'gb__learning_rate': Real(0.01, 0.1), 'gb__subsample': Real(0.5, 1), 'gb__max_depth': Integer(3, 7), 'gb__max_features': Categorical(['sqrt', 'log2']) }) gradient_boosting_pipeline = Pipeline([ ('mapper', ordinal_data_mapper), ('gb', GradientBoostingClassifier(n_estimators=200)) ]) def test_gradient_boosting(): runner = Runner( 'model/experiment/output/gradient_boosting_basic', load_clean_sample_data_frame(), 'arrest', gradient_boosting_pipeline,
from skopt.space import Integer, Real from sklearn.pipeline import Pipeline from utility import HyperParameters, Runner from model import load_sample_data_frame, ordinal_data_mapper sample = None iterations = 24 hyper_parameters = HyperParameters( search_space={ 'xgb__n_estimators': Integer(100, 500), 'xgb__learning_rate': Real(0.1, 0.3), 'xgb__gamma': Real(0.0001, 100.0, prior='log-uniform'), 'xgb__max_depth': Integer(3, 7), 'xgb__colsample_bytree': Real(0.4, 0.8), 'xgb__colsample_bylevel': Real(0.4, 0.8), 'xgb__colsample_bynode': Real(0.4, 0.8) }) xgboost_basic = Pipeline([('mapper', ordinal_data_mapper), ('xgb', xgb.XGBClassifier(tree_method='hist'))]) def test_xgboost(): runner = Runner('model/experiment/output/xgboost_basic_full', load_sample_data_frame(), 'arrest', xgboost_basic, hyper_parameters=hyper_parameters)
from skopt.space import Categorical, Integer from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline from utility import HyperParameters, Runner from model import load_sample_data_frame, ordinal_data_mapper sample = None iterations = 24 hyper_parameters = HyperParameters(search_space={ 'rf__n_estimators': Integer(50, 100), 'rf__criterion': Categorical(['gini', 'entropy']), 'rf__max_depth': Integer(4, 18), 'rf__max_features': Categorical(['sqrt', 'log2', None]), 'rf__bootstrap': Categorical([True, False]) }) random_forest_basic = Pipeline([ ('mapper', ordinal_data_mapper), ('rf', RandomForestClassifier()) ]) def test_random_forest_basic(): runner = Runner( 'model/experiment/output/random_forest_basic', load_sample_data_frame(), 'violation',
from skopt.space import Categorical, Integer, Real from sklearn.ensemble import AdaBoostClassifier from sklearn.pipeline import Pipeline from utility import HyperParameters, Runner from model import load_clean_sample_data_frame, ordinal_data_mapper sample = None iterations = 12 hyper_parameters = HyperParameters({ 'ada__n_estimators': Integer(50, 200), 'ada__learning_rate': Real(0.5, 1.5), 'ada__algorithm': Categorical(['SAMME', 'SAMME.R']) }) ada_boost_pipeline = Pipeline([('mapper', ordinal_data_mapper), ('ada', AdaBoostClassifier())]) def test_ada_boost(): runner = Runner('model/experiment/output/ada_boost_basic', load_clean_sample_data_frame(), 'arrest', ada_boost_pipeline, hyper_parameters=hyper_parameters) runner.run_classification_search_experiment('roc_auc',
from skopt.space import Categorical, Integer, Real from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline from utility import HyperParameters, Runner from model import load_clean_sample_data_frame, ordinal_data_mapper sample = None iterations = 12 hyper_parameters = HyperParameters({ 'rf__n_estimators': Integer(50, 150), 'rf__criterion': Categorical(['gini', 'entropy']), 'rf__max_depth': Integer(4, 18), 'rf__max_features': Categorical(['sqrt', 'log2']) }) random_forest_pipeline = Pipeline([('mapper', ordinal_data_mapper), ('rf', RandomForestClassifier())]) def test_random_forest(): runner = Runner('model/experiment/output/random_forest_basic', load_clean_sample_data_frame(), 'arrest', random_forest_pipeline, hyper_parameters=hyper_parameters)