def test_errors(self): tunables = { 'a_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 2] } }, 'another_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 2] } } } def scorer(name, proposal): if name == 'another_tunable': raise Exception() else: return proposal['a_parameter'] session = BTBSession(tunables, scorer) best = session.run(4) assert best['name'] == 'a_tunable' assert best['config'] == {'a_parameter': 2}
def test_record_score_lt_best(self): # setup tuner = MagicMock() instance = MagicMock(spec_set=BTBSession) instance._make_id.return_value = 0 instance.proposals = [{'test': 'test'}] instance._tuners = {'test': tuner} instance.best_proposal = None instance._best_normalized = 10 instance._normalize.return_value = 1 instance._normalized_scores = defaultdict(list) # run BTBSession.record(instance, 'test', 'config', 1) # assert expected_normalized_scores = defaultdict(list) expected_normalized_scores['test'].append(1) assert instance.best_proposal is None assert instance._normalized_scores == expected_normalized_scores assert instance._best_normalized == 10 tuner.record.assert_called_once_with('config', 1)
def tune(self, X, y, max_evals=10, scoring=None, verbose=False): """ Tune the pipeline hyper-parameters and select the optimized model. Args: X (pandas.DataFrame or ndarray): Inputs to the pipeline. y (pandas.Series or ndarray): Target values. max_evals (int): Maximum number of hyper-parameter optimization iterations. scoring (str): The name of the scoring function. verbose (bool): Whether to log information during processing. """ tunables = {'0': self._pipeline.get_tunable_hyperparameters(flat=True)} session = BTBSession(tunables, lambda _, hyparam: self.k_fold_validation( hyparam, X=X, y=y, scoring=scoring), max_errors=max_evals, verbose=verbose) best_proposal = session.run(max_evals) self._pipeline.set_hyperparameters(best_proposal['config'])
def test_propose_no_tunables(self): # setup instance = MagicMock(spec_set=BTBSession) instance._tunables = None # run with self.assertRaises(StopTuning): BTBSession.propose(instance)
def test_propose_tuner_is_none(self): # setup instance = MagicMock(spec_set=BTBSession) instance._tuners = {'test_tunable': None} instance._tunable_names = ['test_tunable'] instance._normalized_scores = None instance._get_next_tunable_name.return_value = 'test_tunable' # run with self.assertRaises(ValueError): BTBSession.propose(instance)
def test_handle_error_errors_gt_max_errors(self): # setup instance = MagicMock(spec_set=BTBSession) instance.errors = Counter() instance._max_errors = 0 # run BTBSession.handle_error(instance, 'test') # assert instance._remove_tunable.assert_called_once_with('test')
def test_handle_error_errors_gt_max_errors(self): # setup instance = MagicMock(spec_set=BTBSession) instance.errors = Counter() instance._max_errors = 0 # run BTBSession.handle_error(instance, 'test') # assert instance._normalized_scores.pop.assert_called_once_with('test', None) instance._tunable_names.remove.assert_called_once_with('test')
def test_record_score_is_none(self): # setup instance = MagicMock(spec_set=BTBSession) instance._make_id.return_value = 0 instance.proposals = [{'test': 'test'}] instance.errors = Counter() instance._max_errors = 5 # run BTBSession.record(instance, 'test', 'config', None) # assert instance.handle_error.assert_called_once_with('test')
def test_normalized_score_becomes_none(self): """Tunables that worked at some point but end up removed are not tried again. After commit ``6a08dc3cf1b68b35630cae6a87783aec4e2c9f83`` the following scenario has been observed: - One tunable produces a score at least once and then fails the next trials. - All the other tunables never produce any score. - Once all the tuners are created, only the one that produced a score is used. - After enough errors, this one is discarded, so `_normalized_errors` is empty. - Since a random.choice is used over the list of tunables, which still contains the one tha has been discarded, at some point the discarded one is tried again. This test certifies that this scenario cannot happen again, by validating that the number of errors is always ``max_errors`` at most. """ scores = [] def scorer(name, proposal): """Produce a score for the first trial and then fail forever.""" if not scores: scores.append(1) # boolean variable fails due to scope unles using global return 1 raise Exception() tunables = { 'a_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 10] } }, 'another_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 10] } } } session = BTBSession(tunables, scorer, max_errors=3) with pytest.raises(StopTuning): session.run(8) assert session.errors == {'a_tunable': 3, 'another_tunable': 3}
def test_stop(self): tunables = { 'a_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 2] } } } session = BTBSession(tunables, self.scorer) with pytest.raises(StopTuning): session.run()
def test_propose_normalized_scores_gt_tunable_names(self): # setup tuner = MagicMock() tuner.propose.return_value = 'parameters' instance = MagicMock(spec_set=BTBSession) instance.proposals = {} instance._normalized_scores.__len__.return_value = 1 instance._selector.select.return_value = 'test_tunable' instance._tuners = {'test_tunable': tuner} instance._tunables = {'test_tunable': 'test_spec'} instance._tunable_names = ['test_tunable'] instance._make_id.return_value = 1 # run res_name, res_config = BTBSession.propose(instance) # assert assert res_name == 'test_tunable' assert res_config == 'parameters' expected_proposals = { 1: { 'id': 1, 'name': 'test_tunable', 'config': 'parameters' } } assert instance.proposals == expected_proposals instance._make_id.assert_called_once_with('test_tunable', 'parameters') tuner.propose.assert_called_once_with(1)
def test___init__custom(self): # setup selector = MagicMock() # run tunable = {'my_test_tuner': {'my_tunable_hp': {}}} scorer = 'my_scorer' instance = BTBSession( tunable, scorer, tuner_class='my_tuner', selector_class=selector, maximize=False, max_errors=2, verbose=True ) # assert assert instance._tunables is tunable assert instance._scorer is scorer assert instance._tuner_class == 'my_tuner' assert instance._max_errors == 2 assert instance._best_normalized == np.inf assert instance._normalized_scores == defaultdict(list) assert instance._tuners == {} assert instance._tunable_names == ['my_test_tuner'] assert instance._range is trange assert instance.best_proposal is None assert instance.proposals == {} assert instance.iterations == 0 assert instance.errors == Counter()
def test_allow_duplicates(self): tunables = { 'a_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 2] } } } session = BTBSession(tunables, self.scorer, allow_duplicates=True) best = session.run(10) assert best['name'] == 'another_tunable' assert best['config'] == {'a_parameter': 2}
def test__normalize_maximize_true(self): # setup instance = MagicMock(spec_set=BTBSession) # run result = BTBSession._normalize(instance, 1) # assert assert result == 1
def test_minimize(self): tunables = { 'a_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 2] } } } session = BTBSession(tunables, self.scorer, maximize=False) best = session.run(3) assert best == session.best_proposal assert best['name'] == 'a_tunable' assert best['config'] == {'a_parameter': 0}
def test__normalize_maximize_false(self): # setup instance = MagicMock(spec_set=BTBSession) instance._maximize = False # run result = BTBSession._normalize(instance, 1) # assert assert result == -1
def test_propose_raise_error(self): # setup tuner = MagicMock() tuner.propose.side_effect = [StopTuning('test')] instance = MagicMock(spec_set=BTBSession) instance._normalized_scores.__len__.return_value = 1 instance._selector.select.return_value = 'test_tunable' instance._tuners = {'test_tunable': tuner} instance._tunables = {'test_tunable': 'test_spec'} instance._tunable_names = ['test_tunable'] instance._make_id.return_value = 1 # run with self.assertRaises(ValueError): BTBSession.propose(instance)
def test_record_score_gt_best_tuner_none(self): # setup instance = MagicMock(spec_set=BTBSession) instance._make_id.return_value = 0 instance.proposals = [{'test': 'test'}] instance._tuners = {'test': None} instance.best_proposal = None instance._best_normalized = 0 instance._normalize.return_value = 1 instance._normalized_scores = defaultdict(list) # run BTBSession.record(instance, 'test', 'config', 1) # assert expected_normalized_scores = defaultdict(list) expected_normalized_scores['test'].append(1) assert instance._normalized_scores == expected_normalized_scores assert instance.best_proposal == {'test': 'test', 'score': 1} assert instance._best_normalized == 1
def test_allow_errors(self): tunables = { 'a_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 1] } } } def scorer(name, proposal): if proposal['a_parameter'] == 0: raise Exception() return 1 session = BTBSession(tunables, scorer, max_errors=10) best = session.run(10) assert best['name'] == 'a_tunable' assert best['config'] == {'a_parameter': 1}
def test_multiple(self): tunables = { 'a_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 2] } }, 'another_tunable': { 'a_parameter': { 'type': 'int', 'default': 0, 'range': [0, 2] } } } session = BTBSession(tunables, self.scorer) best = session.run(6) assert best['name'] == 'another_tunable' assert best['config'] == {'a_parameter': 2}
def test_run_score(self): # setup instance = MagicMock(spec_set=BTBSession) instance.propose.return_value = ('test', 'config') instance._scorer.return_value = 1 instance.best_proposal = {'test': 'config'} instance._range = range instance.iterations = 0 # run result = BTBSession.run(instance, 1) # assert instance._scorer.assert_called_once_with('test', 'config') instance.record.assert_called_once_with('test', 'config', 1) assert result == {'test': 'config'} assert instance.iterations == 1
def test_run_score_none(self): # setup instance = MagicMock(spec_set=BTBSession) instance.propose.return_value = ('test', {'hp': 'test'}) instance._scorer.side_effect = Exception() instance.best_proposal = {'test': 'config'} instance._range = range instance.iterations = 0 # run result = BTBSession.run(instance, 1) # assert instance._scorer.assert_called_once_with('test', {'hp': 'test'}) instance.record.assert_called_once_with('test', {'hp': 'test'}, None) assert result == {'test': 'config'} assert instance.iterations == 1
def test__make_dumpable(self): # run randint = np.random.randint(1, dtype=np.integer) to_dump = { 1: randint, 'str': 'None', 'array': np.array([1, 2, 3]), } result = BTBSession._make_dumpable(MagicMock(), to_dump) # assert expected_result = { '1': int(randint), 'str': None, 'array': [1, 2, 3] } assert result == expected_result
def test__get_next_tunable_name_normalized_scores(self, mock_np_random_choice): # setup mock_np_random_choice.return_value = 'test_name' selector = MagicMock() selector.select.return_value = 'test_name' instance = MagicMock(spec_set=BTBSession) instance._normalized_scores = [('test_name', 0.1), ('second_test_name', 0.2)] instance._selector = selector # run tunable_name = BTBSession._get_next_tunable_name(instance) # assert assert tunable_name == 'test_name' mock_np_random_choice.assert_not_called() selector.select.assert_called_once_with([ ('test_name', 0.1), ('second_test_name', 0.2), ])
def test__get_next_tunable_name_normalized_scores_none(self, mock_np_random_choice): # setup mock_np_random_choice.return_value = 'test_name' instance = MagicMock(spec_set=BTBSession) instance._normalized_scores = None instance._tunables = { 'test_name': 'test', 'second_test_name': 'second_test', } # python3.5 randomness issue, we read as in `btb.session` expected_mock_call = list(instance._tunables.keys()) # run tunable_name = BTBSession._get_next_tunable_name(instance) # assert assert tunable_name == 'test_name' mock_np_random_choice.assert_called_once_with(expected_mock_call)
def test_propose_normalized_scores_lt_tunable_names(self, mock_tunable, mock_isinstance): # setup mock_tunable.from_dict.return_value.get_defaults.return_value = 'defaults' mock_isinstance.return_value = True tuner = MagicMock() instance = MagicMock(spec_set=BTBSession) instance._tuner_class = tuner instance.proposals = {} instance._normalized_scores.__len__.return_value = 0 instance._tunables = {'test_tunable': 'test_spec'} instance._tunable_names = ['test_tunable'] instance._make_id.return_value = 1 # run res_name, res_config = BTBSession.propose(instance) # assert assert res_name == 'test_tunable' assert res_config == 'defaults' expected_proposals = { 1: { 'id': 1, 'name': 'test_tunable', 'config': 'defaults' } } assert instance.proposals == expected_proposals instance._make_id.assert_called_once_with('test_tunable', 'defaults') mock_tunable.from_dict.assert_called_once_with('test_spec') tuner.assert_called_once_with(mock_tunable.from_dict.return_value) mock_tunable.from_dict.return_value.get_defaults.assert_called_once_with() expected_isinstance_calls = [call('test_spec', dict), call('defaults', mock_tunable)] mock_isinstance.has_calls(expected_isinstance_calls)
def test_propose_tunable_cardinality_eq_one(self, mock_tunable, mock_isinstance): # setup mock_tunable.from_dict.return_value.cardinality = 1 mock_tunable.from_dict.return_value.get_defaults.return_value = 'parameters' mock_isinstance.return_value = True instance = MagicMock(spec_set=BTBSession) instance._tuners = {} instance._tunable_names = ['test_tunable'] instance.proposals = {} instance._make_id.return_value = 1 # run tunable_name, config = BTBSession.propose(instance) # assert instance._make_id.assert_called_once_with('test_tunable', 'parameters') instance._tuner_class.assert_not_called() assert instance._tuners == {'test_tunable': None} assert 'test_tunable' == tunable_name assert 'parameters' == config
def test___init__default(self): # run tunable = {'my_test_tuner': {'my_tunable_hp': {}}} scorer = 'my_scorer' instance = BTBSession(tunable, scorer) # assert assert instance._tunables is tunable assert instance._scorer is scorer assert instance._tuner_class is GPTuner assert instance._best_normalized == -np.inf assert instance._normalized_scores == defaultdict(list) assert instance._tuners == {} assert instance._tunable_names == ['my_test_tuner'] assert instance._range is range assert instance._max_errors == 1 assert instance._maximize assert instance.best_score is None assert instance.best_proposal is None assert instance.proposals == {} assert instance.iterations == 0 assert instance.errors == Counter()
def train_btb(X_train,X_test,y_train,y_test,mtype,common_name_model,problemtype,classes,default_featurenames,transform_model,settings,model_session): # create file names model_name=common_name_model+'.pickle' folder='btb_session' csvname=common_name_model.split('_')[0] curdir=os.getcwd() files=list() # make a temporary folder for the training session try: os.mkdir(folder) os.chdir(folder) except: shutil.rmtree(folder) os.mkdir(folder) os.chdir(folder) # get training and testing data try: shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_train_transformed.csv',os.getcwd()+'/train.csv') shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_test_transformed.csv',os.getcwd()+'/test.csv') except: shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_train.csv',os.getcwd()+'/train.csv') shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_test.csv',os.getcwd()+'/test.csv') # create required .JSON dataset_id, filename=create_json(folder, 'train.csv') os.mkdir(dataset_id) os.chdir(dataset_id) os.mkdir('tables') shutil.copy(curdir+'/'+folder+'/train.csv', os.getcwd()+'/tables/train.csv') if mtype=='c': def build_model(name, hyperparameters): model_class = models[name] return model_class(random_state=0, **hyperparameters) def score_model(name, hyperparameters): model = build_model(name, hyperparameters) scores = cross_val_score(model, X_train, y_train) return scores.mean() rf_hyperparams = {'n_estimators': IntHyperParam(min=10, max=500), 'max_depth': IntHyperParam(min=10, max=500)} rf_tunable = Tunable(rf_hyperparams) print(rf_tunable) svc_hyperparams = {'C': FloatHyperParam(min=0.01, max=10.0), 'gamma': FloatHyperParam(0.000000001, 0.0000001)} svc_tunable = Tunable(svc_hyperparams) print(svc_tunable) tuners = {'RF': rf_tunable, 'SVC': svc_tunable} print(tuners) models = {'RF': RandomForestClassifier, 'SVC': SVC} selector = UCB1(['RF', 'SVC']) session = BTBSession(tuners, score_model, verbose=True) best_proposal = session.run(iterations=100) best_model = build_model(best_proposal['name'], best_proposal['config']) best_model.fit(X_train, y_train) accuracy = best_model.score(X_test, y_test) # tuner.record(parameters, score) print('ACCURACY:') print(accuracy) # now save the model in .pickle os.chdir(curdir) f=open(model_name,'wb') pickle.dump(best_model, f) f.close() elif mtype == 'r': tunables = { 'random_forest': { 'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]}, 'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']}, 'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]}, 'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]}, }, 'extra_trees': { 'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]}, 'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']}, 'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]}, 'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]}, } } models = { 'random_forest': RandomForestRegressor, 'extra_trees': ExtraTreesRegressor, } def build_model(name, hyperparameters): model_class = models[name] return model_class(random_state=0, **hyperparameters) def score_model(name, hyperparameters): model = build_model(name, hyperparameters) r2_scorer = make_scorer(r2_score) scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer) return scores.mean() session = BTBSession(tunables, score_model, verbose=True) best_proposal = session.run(iterations=100) best_model = build_model(best_proposal['name'], best_proposal['config']) best_model.fit(X_train, y_train) pred = best_model.predict(X_test) r2_score=r2_score(y_test, pred) print('R2 score!!') print(r2_score) # now save the model in .pickle os.chdir(curdir) f=open(model_name,'wb') pickle.dump(best_model, f) f.close() files.append(model_name) files.append(folder) model_dir=os.getcwd() return model_name, model_dir, files
def test_session(): def build_model(name, hyperparameters): model_class = models[name] return model_class(random_state=0, **hyperparameters) def score_model(name, hyperparameters): model = build_model(name, hyperparameters) r2_scorer = make_scorer(r2_score) scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer) return scores.mean() dataset = load_dataset() X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.3, random_state=0) tunables = { 'random_forest': { 'n_estimators': { 'type': 'int', 'default': 2, 'range': [1, 1000] }, 'max_features': { 'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt'] }, 'min_samples_split': { 'type': 'int', 'default': 2, 'range': [2, 20] }, 'min_samples_leaf': { 'type': 'int', 'default': 2, 'range': [1, 20] }, }, 'extra_trees': { 'n_estimators': { 'type': 'int', 'default': 2, 'range': [1, 1000] }, 'max_features': { 'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt'] }, 'min_samples_split': { 'type': 'int', 'default': 2, 'range': [2, 20] }, 'min_samples_leaf': { 'type': 'int', 'default': 2, 'range': [1, 20] }, } } models = { 'random_forest': RandomForestRegressor, 'extra_trees': ExtraTreesRegressor, } session = BTBSession(tunables, score_model, verbose=True) session.run(2)