예제 #1
0
    def test_errors(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            },
            'another_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        def scorer(name, proposal):
            if name == 'another_tunable':
                raise Exception()
            else:
                return proposal['a_parameter']

        session = BTBSession(tunables, scorer)

        best = session.run(4)

        assert best['name'] == 'a_tunable'
        assert best['config'] == {'a_parameter': 2}
예제 #2
0
    def test_record_score_lt_best(self):
        # setup
        tuner = MagicMock()

        instance = MagicMock(spec_set=BTBSession)
        instance._make_id.return_value = 0
        instance.proposals = [{'test': 'test'}]
        instance._tuners = {'test': tuner}
        instance.best_proposal = None

        instance._best_normalized = 10
        instance._normalize.return_value = 1
        instance._normalized_scores = defaultdict(list)

        # run
        BTBSession.record(instance, 'test', 'config', 1)

        # assert
        expected_normalized_scores = defaultdict(list)
        expected_normalized_scores['test'].append(1)

        assert instance.best_proposal is None
        assert instance._normalized_scores == expected_normalized_scores
        assert instance._best_normalized == 10

        tuner.record.assert_called_once_with('config', 1)
예제 #3
0
파일: modeler.py 프로젝트: srravula1/Cardea
    def tune(self, X, y, max_evals=10, scoring=None, verbose=False):
        """ Tune the pipeline hyper-parameters and select the optimized model.

        Args:
            X (pandas.DataFrame or ndarray):
                Inputs to the pipeline.
            y (pandas.Series or ndarray):
                Target values.
            max_evals (int):
                Maximum number of hyper-parameter optimization iterations.
            scoring (str):
                The name of the scoring function.
            verbose (bool):
                Whether to log information during processing.
        """
        tunables = {'0': self._pipeline.get_tunable_hyperparameters(flat=True)}

        session = BTBSession(tunables,
                             lambda _, hyparam: self.k_fold_validation(
                                 hyparam, X=X, y=y, scoring=scoring),
                             max_errors=max_evals,
                             verbose=verbose)

        best_proposal = session.run(max_evals)
        self._pipeline.set_hyperparameters(best_proposal['config'])
예제 #4
0
    def test_propose_no_tunables(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance._tunables = None

        # run
        with self.assertRaises(StopTuning):
            BTBSession.propose(instance)
예제 #5
0
    def test_propose_tuner_is_none(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance._tuners = {'test_tunable': None}
        instance._tunable_names = ['test_tunable']
        instance._normalized_scores = None
        instance._get_next_tunable_name.return_value = 'test_tunable'

        # run
        with self.assertRaises(ValueError):
            BTBSession.propose(instance)
예제 #6
0
    def test_handle_error_errors_gt_max_errors(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance.errors = Counter()
        instance._max_errors = 0

        # run
        BTBSession.handle_error(instance, 'test')

        # assert
        instance._remove_tunable.assert_called_once_with('test')
예제 #7
0
    def test_handle_error_errors_gt_max_errors(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance.errors = Counter()
        instance._max_errors = 0

        # run
        BTBSession.handle_error(instance, 'test')

        # assert
        instance._normalized_scores.pop.assert_called_once_with('test', None)
        instance._tunable_names.remove.assert_called_once_with('test')
예제 #8
0
    def test_record_score_is_none(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance._make_id.return_value = 0
        instance.proposals = [{'test': 'test'}]
        instance.errors = Counter()
        instance._max_errors = 5

        # run
        BTBSession.record(instance, 'test', 'config', None)

        # assert
        instance.handle_error.assert_called_once_with('test')
예제 #9
0
    def test_normalized_score_becomes_none(self):
        """Tunables that worked at some point but end up removed are not tried again.

        After commit ``6a08dc3cf1b68b35630cae6a87783aec4e2c9f83`` the following
        scenario has been observed:

        - One tunable produces a score at least once and then fails the next trials.
        - All the other tunables never produce any score.
        - Once all the tuners are created, only the one that produced a score is used.
        - After enough errors, this one is discarded, so `_normalized_errors` is empty.
        - Since a random.choice is used over the list of tunables, which still contains
          the one tha has been discarded, at some point the discarded one is tried again.

        This test certifies that this scenario cannot happen again, by validating that
        the number of errors is always ``max_errors`` at most.
        """
        scores = []

        def scorer(name, proposal):
            """Produce a score for the first trial and then fail forever."""
            if not scores:
                scores.append(1)   # boolean variable fails due to scope unles using global
                return 1

            raise Exception()

        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 10]
                }
            },
            'another_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 10]
                }
            }
        }

        session = BTBSession(tunables, scorer, max_errors=3)

        with pytest.raises(StopTuning):
            session.run(8)

        assert session.errors == {'a_tunable': 3, 'another_tunable': 3}
예제 #10
0
    def test_stop(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer)

        with pytest.raises(StopTuning):
            session.run()
예제 #11
0
    def test_propose_normalized_scores_gt_tunable_names(self):
        # setup
        tuner = MagicMock()
        tuner.propose.return_value = 'parameters'

        instance = MagicMock(spec_set=BTBSession)
        instance.proposals = {}
        instance._normalized_scores.__len__.return_value = 1

        instance._selector.select.return_value = 'test_tunable'
        instance._tuners = {'test_tunable': tuner}
        instance._tunables = {'test_tunable': 'test_spec'}
        instance._tunable_names = ['test_tunable']

        instance._make_id.return_value = 1

        # run
        res_name, res_config = BTBSession.propose(instance)

        # assert
        assert res_name == 'test_tunable'
        assert res_config == 'parameters'

        expected_proposals = {
            1: {
                'id': 1,
                'name': 'test_tunable',
                'config': 'parameters'
            }
        }
        assert instance.proposals == expected_proposals

        instance._make_id.assert_called_once_with('test_tunable', 'parameters')
        tuner.propose.assert_called_once_with(1)
예제 #12
0
    def test___init__custom(self):
        # setup
        selector = MagicMock()

        # run
        tunable = {'my_test_tuner': {'my_tunable_hp': {}}}
        scorer = 'my_scorer'
        instance = BTBSession(
            tunable,
            scorer,
            tuner_class='my_tuner',
            selector_class=selector,
            maximize=False,
            max_errors=2,
            verbose=True
        )

        # assert
        assert instance._tunables is tunable
        assert instance._scorer is scorer
        assert instance._tuner_class == 'my_tuner'
        assert instance._max_errors == 2
        assert instance._best_normalized == np.inf
        assert instance._normalized_scores == defaultdict(list)
        assert instance._tuners == {}
        assert instance._tunable_names == ['my_test_tuner']
        assert instance._range is trange

        assert instance.best_proposal is None
        assert instance.proposals == {}
        assert instance.iterations == 0
        assert instance.errors == Counter()
예제 #13
0
    def test_allow_duplicates(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer, allow_duplicates=True)

        best = session.run(10)

        assert best['name'] == 'another_tunable'
        assert best['config'] == {'a_parameter': 2}
예제 #14
0
    def test__normalize_maximize_true(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)

        # run
        result = BTBSession._normalize(instance, 1)

        # assert
        assert result == 1
예제 #15
0
    def test_minimize(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer, maximize=False)

        best = session.run(3)

        assert best == session.best_proposal
        assert best['name'] == 'a_tunable'
        assert best['config'] == {'a_parameter': 0}
예제 #16
0
    def test__normalize_maximize_false(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance._maximize = False

        # run
        result = BTBSession._normalize(instance, 1)

        # assert
        assert result == -1
예제 #17
0
    def test_propose_raise_error(self):
        # setup

        tuner = MagicMock()
        tuner.propose.side_effect = [StopTuning('test')]

        instance = MagicMock(spec_set=BTBSession)
        instance._normalized_scores.__len__.return_value = 1

        instance._selector.select.return_value = 'test_tunable'
        instance._tuners = {'test_tunable': tuner}
        instance._tunables = {'test_tunable': 'test_spec'}
        instance._tunable_names = ['test_tunable']

        instance._make_id.return_value = 1

        # run
        with self.assertRaises(ValueError):
            BTBSession.propose(instance)
예제 #18
0
    def test_record_score_gt_best_tuner_none(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance._make_id.return_value = 0
        instance.proposals = [{'test': 'test'}]
        instance._tuners = {'test': None}
        instance.best_proposal = None

        instance._best_normalized = 0
        instance._normalize.return_value = 1
        instance._normalized_scores = defaultdict(list)

        # run
        BTBSession.record(instance, 'test', 'config', 1)

        # assert
        expected_normalized_scores = defaultdict(list)
        expected_normalized_scores['test'].append(1)

        assert instance._normalized_scores == expected_normalized_scores
        assert instance.best_proposal == {'test': 'test', 'score': 1}
        assert instance._best_normalized == 1
예제 #19
0
    def test_allow_errors(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 1]
                }
            }
        }

        def scorer(name, proposal):
            if proposal['a_parameter'] == 0:
                raise Exception()

            return 1

        session = BTBSession(tunables, scorer, max_errors=10)

        best = session.run(10)

        assert best['name'] == 'a_tunable'
        assert best['config'] == {'a_parameter': 1}
예제 #20
0
    def test_multiple(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            },
            'another_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer)

        best = session.run(6)

        assert best['name'] == 'another_tunable'
        assert best['config'] == {'a_parameter': 2}
예제 #21
0
    def test_run_score(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance.propose.return_value = ('test', 'config')
        instance._scorer.return_value = 1
        instance.best_proposal = {'test': 'config'}
        instance._range = range
        instance.iterations = 0

        # run
        result = BTBSession.run(instance, 1)

        # assert
        instance._scorer.assert_called_once_with('test', 'config')
        instance.record.assert_called_once_with('test', 'config', 1)
        assert result == {'test': 'config'}
        assert instance.iterations == 1
예제 #22
0
    def test_run_score_none(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance.propose.return_value = ('test', {'hp': 'test'})
        instance._scorer.side_effect = Exception()
        instance.best_proposal = {'test': 'config'}
        instance._range = range
        instance.iterations = 0

        # run
        result = BTBSession.run(instance, 1)

        # assert
        instance._scorer.assert_called_once_with('test', {'hp': 'test'})
        instance.record.assert_called_once_with('test', {'hp': 'test'}, None)
        assert result == {'test': 'config'}
        assert instance.iterations == 1
예제 #23
0
    def test__make_dumpable(self):
        # run
        randint = np.random.randint(1, dtype=np.integer)
        to_dump = {
            1: randint,
            'str': 'None',
            'array': np.array([1, 2, 3]),
        }

        result = BTBSession._make_dumpable(MagicMock(), to_dump)

        # assert
        expected_result = {
            '1': int(randint),
            'str': None,
            'array': [1, 2, 3]
        }

        assert result == expected_result
예제 #24
0
    def test__get_next_tunable_name_normalized_scores(self, mock_np_random_choice):
        # setup
        mock_np_random_choice.return_value = 'test_name'
        selector = MagicMock()
        selector.select.return_value = 'test_name'

        instance = MagicMock(spec_set=BTBSession)
        instance._normalized_scores = [('test_name', 0.1), ('second_test_name', 0.2)]
        instance._selector = selector

        # run
        tunable_name = BTBSession._get_next_tunable_name(instance)

        # assert
        assert tunable_name == 'test_name'
        mock_np_random_choice.assert_not_called()
        selector.select.assert_called_once_with([
            ('test_name', 0.1),
            ('second_test_name', 0.2),
        ])
예제 #25
0
    def test__get_next_tunable_name_normalized_scores_none(self, mock_np_random_choice):
        # setup
        mock_np_random_choice.return_value = 'test_name'

        instance = MagicMock(spec_set=BTBSession)
        instance._normalized_scores = None
        instance._tunables = {
            'test_name': 'test',
            'second_test_name': 'second_test',
        }

        # python3.5 randomness issue, we read as in `btb.session`
        expected_mock_call = list(instance._tunables.keys())

        # run
        tunable_name = BTBSession._get_next_tunable_name(instance)

        # assert

        assert tunable_name == 'test_name'
        mock_np_random_choice.assert_called_once_with(expected_mock_call)
예제 #26
0
    def test_propose_normalized_scores_lt_tunable_names(self, mock_tunable, mock_isinstance):
        # setup
        mock_tunable.from_dict.return_value.get_defaults.return_value = 'defaults'
        mock_isinstance.return_value = True

        tuner = MagicMock()

        instance = MagicMock(spec_set=BTBSession)
        instance._tuner_class = tuner
        instance.proposals = {}
        instance._normalized_scores.__len__.return_value = 0
        instance._tunables = {'test_tunable': 'test_spec'}
        instance._tunable_names = ['test_tunable']

        instance._make_id.return_value = 1

        # run
        res_name, res_config = BTBSession.propose(instance)

        # assert
        assert res_name == 'test_tunable'
        assert res_config == 'defaults'

        expected_proposals = {
            1: {
                'id': 1,
                'name': 'test_tunable',
                'config': 'defaults'
            }
        }
        assert instance.proposals == expected_proposals

        instance._make_id.assert_called_once_with('test_tunable', 'defaults')
        mock_tunable.from_dict.assert_called_once_with('test_spec')
        tuner.assert_called_once_with(mock_tunable.from_dict.return_value)
        mock_tunable.from_dict.return_value.get_defaults.assert_called_once_with()

        expected_isinstance_calls = [call('test_spec', dict), call('defaults', mock_tunable)]
        mock_isinstance.has_calls(expected_isinstance_calls)
예제 #27
0
    def test_propose_tunable_cardinality_eq_one(self, mock_tunable, mock_isinstance):
        # setup
        mock_tunable.from_dict.return_value.cardinality = 1
        mock_tunable.from_dict.return_value.get_defaults.return_value = 'parameters'
        mock_isinstance.return_value = True

        instance = MagicMock(spec_set=BTBSession)
        instance._tuners = {}
        instance._tunable_names = ['test_tunable']
        instance.proposals = {}

        instance._make_id.return_value = 1

        # run
        tunable_name, config = BTBSession.propose(instance)

        # assert
        instance._make_id.assert_called_once_with('test_tunable', 'parameters')
        instance._tuner_class.assert_not_called()

        assert instance._tuners == {'test_tunable': None}
        assert 'test_tunable' == tunable_name
        assert 'parameters' == config
예제 #28
0
    def test___init__default(self):
        # run
        tunable = {'my_test_tuner': {'my_tunable_hp': {}}}
        scorer = 'my_scorer'
        instance = BTBSession(tunable, scorer)

        # assert
        assert instance._tunables is tunable
        assert instance._scorer is scorer
        assert instance._tuner_class is GPTuner
        assert instance._best_normalized == -np.inf
        assert instance._normalized_scores == defaultdict(list)
        assert instance._tuners == {}
        assert instance._tunable_names == ['my_test_tuner']
        assert instance._range is range
        assert instance._max_errors == 1
        assert instance._maximize

        assert instance.best_score is None
        assert instance.best_proposal is None
        assert instance.proposals == {}
        assert instance.iterations == 0
        assert instance.errors == Counter()
예제 #29
0
def train_btb(X_train,X_test,y_train,y_test,mtype,common_name_model,problemtype,classes,default_featurenames,transform_model,settings,model_session):

	# create file names
	model_name=common_name_model+'.pickle'
	folder='btb_session'
	csvname=common_name_model.split('_')[0]
	curdir=os.getcwd()
	files=list()

	# make a temporary folder for the training session
	try:
		os.mkdir(folder)
		os.chdir(folder)
	except:
		shutil.rmtree(folder)
		os.mkdir(folder)
		os.chdir(folder)

	# get training and testing data
	try:
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_train_transformed.csv',os.getcwd()+'/train.csv')
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_test_transformed.csv',os.getcwd()+'/test.csv')
	except:
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_train.csv',os.getcwd()+'/train.csv')  
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_test.csv',os.getcwd()+'/test.csv')

	# create required .JSON
	dataset_id, filename=create_json(folder, 'train.csv')
	os.mkdir(dataset_id)
	os.chdir(dataset_id)
	os.mkdir('tables')
	shutil.copy(curdir+'/'+folder+'/train.csv', os.getcwd()+'/tables/train.csv')

	if mtype=='c':

		def build_model(name, hyperparameters):
			model_class = models[name]
			return model_class(random_state=0, **hyperparameters)

		def score_model(name, hyperparameters):
			model = build_model(name, hyperparameters)
			scores = cross_val_score(model, X_train, y_train)
			return scores.mean()

		rf_hyperparams = {'n_estimators': IntHyperParam(min=10, max=500),
						'max_depth': IntHyperParam(min=10, max=500)}

		rf_tunable = Tunable(rf_hyperparams)
		print(rf_tunable)

		svc_hyperparams = {'C': FloatHyperParam(min=0.01, max=10.0),
							'gamma': FloatHyperParam(0.000000001, 0.0000001)}

		svc_tunable = Tunable(svc_hyperparams)
		print(svc_tunable)

		tuners = {'RF': rf_tunable,
				  'SVC': svc_tunable}

		print(tuners)

		models = {'RF': RandomForestClassifier,
				  'SVC': SVC}

		selector = UCB1(['RF', 'SVC'])

		session = BTBSession(tuners, score_model, verbose=True)
		best_proposal = session.run(iterations=100)  
		best_model = build_model(best_proposal['name'], best_proposal['config'])
		best_model.fit(X_train, y_train)
		accuracy =  best_model.score(X_test, y_test)

		# tuner.record(parameters, score)
		print('ACCURACY:')
		print(accuracy)

		# now save the model in .pickle
		os.chdir(curdir)
		f=open(model_name,'wb')
		pickle.dump(best_model, f)
		f.close()


	elif mtype == 'r':


		tunables = {
			'random_forest': {
				'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
				'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
				'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
				'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
			},
			'extra_trees': {
				'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
				'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
				'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
				'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
			}
		}

		models = {
			'random_forest': RandomForestRegressor,
			'extra_trees': ExtraTreesRegressor,
		}

		def build_model(name, hyperparameters):
			model_class = models[name]
			return model_class(random_state=0, **hyperparameters)

		def score_model(name, hyperparameters):
			model = build_model(name, hyperparameters)
			r2_scorer = make_scorer(r2_score)
			scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer)
			return scores.mean()


		session = BTBSession(tunables, score_model, verbose=True)
		best_proposal = session.run(iterations=100)  
		best_model = build_model(best_proposal['name'], best_proposal['config'])

		best_model.fit(X_train, y_train)
		pred = best_model.predict(X_test)

		r2_score=r2_score(y_test, pred)

		print('R2 score!!')
		print(r2_score)
		
		# now save the model in .pickle
		os.chdir(curdir)
		f=open(model_name,'wb')
		pickle.dump(best_model, f)
		f.close()

	files.append(model_name)
	files.append(folder)
	model_dir=os.getcwd()

	return model_name, model_dir, files
예제 #30
0
def test_session():
    def build_model(name, hyperparameters):
        model_class = models[name]
        return model_class(random_state=0, **hyperparameters)

    def score_model(name, hyperparameters):
        model = build_model(name, hyperparameters)
        r2_scorer = make_scorer(r2_score)
        scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer)
        return scores.mean()

    dataset = load_dataset()

    X_train, X_test, y_train, y_test = train_test_split(dataset.data,
                                                        dataset.target,
                                                        test_size=0.3,
                                                        random_state=0)

    tunables = {
        'random_forest': {
            'n_estimators': {
                'type': 'int',
                'default': 2,
                'range': [1, 1000]
            },
            'max_features': {
                'type': 'str',
                'default': 'log2',
                'range': [None, 'auto', 'log2', 'sqrt']
            },
            'min_samples_split': {
                'type': 'int',
                'default': 2,
                'range': [2, 20]
            },
            'min_samples_leaf': {
                'type': 'int',
                'default': 2,
                'range': [1, 20]
            },
        },
        'extra_trees': {
            'n_estimators': {
                'type': 'int',
                'default': 2,
                'range': [1, 1000]
            },
            'max_features': {
                'type': 'str',
                'default': 'log2',
                'range': [None, 'auto', 'log2', 'sqrt']
            },
            'min_samples_split': {
                'type': 'int',
                'default': 2,
                'range': [2, 20]
            },
            'min_samples_leaf': {
                'type': 'int',
                'default': 2,
                'range': [1, 20]
            },
        }
    }

    models = {
        'random_forest': RandomForestRegressor,
        'extra_trees': ExtraTreesRegressor,
    }

    session = BTBSession(tunables, score_model, verbose=True)
    session.run(2)