Python BTBSession.run Examples, btb.session.BTBSession.run Python Examples

Example #1

0

Show file

    def test_normalized_score_becomes_none(self):
        """Tunables that worked at some point but end up removed are not tried again.

        After commit ``6a08dc3cf1b68b35630cae6a87783aec4e2c9f83`` the following
        scenario has been observed:

        - One tunable produces a score at least once and then fails the next trials.
        - All the other tunables never produce any score.
        - Once all the tuners are created, only the one that produced a score is used.
        - After enough errors, this one is discarded, so `_normalized_errors` is empty.
        - Since a random.choice is used over the list of tunables, which still contains
          the one tha has been discarded, at some point the discarded one is tried again.

        This test certifies that this scenario cannot happen again, by validating that
        the number of errors is always ``max_errors`` at most.
        """
        scores = []

        def scorer(name, proposal):
            """Produce a score for the first trial and then fail forever."""
            if not scores:
                scores.append(1)   # boolean variable fails due to scope unles using global
                return 1

            raise Exception()

        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 10]
                }
            },
            'another_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 10]
                }
            }
        }

        session = BTBSession(tunables, scorer, max_errors=3)

        with pytest.raises(StopTuning):
            session.run(8)

        assert session.errors == {'a_tunable': 3, 'another_tunable': 3}

Example #2

0

Show file

    def test_stop(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer)

        with pytest.raises(StopTuning):
            session.run()

Example #3

0

Show file

    def test_errors(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            },
            'another_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        def scorer(name, proposal):
            if name == 'another_tunable':
                raise Exception()
            else:
                return proposal['a_parameter']

        session = BTBSession(tunables, scorer)

        best = session.run(4)

        assert best['name'] == 'a_tunable'
        assert best['config'] == {'a_parameter': 2}

Example #4

0

Show file

File: modeler.py Project: srravula1/Cardea

    def tune(self, X, y, max_evals=10, scoring=None, verbose=False):
        """ Tune the pipeline hyper-parameters and select the optimized model.

        Args:
            X (pandas.DataFrame or ndarray):
                Inputs to the pipeline.
            y (pandas.Series or ndarray):
                Target values.
            max_evals (int):
                Maximum number of hyper-parameter optimization iterations.
            scoring (str):
                The name of the scoring function.
            verbose (bool):
                Whether to log information during processing.
        """
        tunables = {'0': self._pipeline.get_tunable_hyperparameters(flat=True)}

        session = BTBSession(tunables,
                             lambda _, hyparam: self.k_fold_validation(
                                 hyparam, X=X, y=y, scoring=scoring),
                             max_errors=max_evals,
                             verbose=verbose)

        best_proposal = session.run(max_evals)
        self._pipeline.set_hyperparameters(best_proposal['config'])

Example #5

0

Show file

    def test_allow_duplicates(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer, allow_duplicates=True)

        best = session.run(10)

        assert best['name'] == 'another_tunable'
        assert best['config'] == {'a_parameter': 2}

Example #6

0

Show file

    def test_run_score_none(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance.propose.return_value = ('test', {'hp': 'test'})
        instance._scorer.side_effect = Exception()
        instance.best_proposal = {'test': 'config'}
        instance._range = range
        instance.iterations = 0

        # run
        result = BTBSession.run(instance, 1)

        # assert
        instance._scorer.assert_called_once_with('test', {'hp': 'test'})
        instance.record.assert_called_once_with('test', {'hp': 'test'}, None)
        assert result == {'test': 'config'}
        assert instance.iterations == 1

Example #7

0

Show file

    def test_run_score(self):
        # setup
        instance = MagicMock(spec_set=BTBSession)
        instance.propose.return_value = ('test', 'config')
        instance._scorer.return_value = 1
        instance.best_proposal = {'test': 'config'}
        instance._range = range
        instance.iterations = 0

        # run
        result = BTBSession.run(instance, 1)

        # assert
        instance._scorer.assert_called_once_with('test', 'config')
        instance.record.assert_called_once_with('test', 'config', 1)
        assert result == {'test': 'config'}
        assert instance.iterations == 1

Example #8

0

Show file

    def test_minimize(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer, maximize=False)

        best = session.run(3)

        assert best == session.best_proposal
        assert best['name'] == 'a_tunable'
        assert best['config'] == {'a_parameter': 0}

Example #9

0

Show file

    def test_allow_errors(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 1]
                }
            }
        }

        def scorer(name, proposal):
            if proposal['a_parameter'] == 0:
                raise Exception()

            return 1

        session = BTBSession(tunables, scorer, max_errors=10)

        best = session.run(10)

        assert best['name'] == 'a_tunable'
        assert best['config'] == {'a_parameter': 1}

Example #10

0

Show file

    def test_multiple(self):
        tunables = {
            'a_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            },
            'another_tunable': {
                'a_parameter': {
                    'type': 'int',
                    'default': 0,
                    'range': [0, 2]
                }
            }
        }

        session = BTBSession(tunables, self.scorer)

        best = session.run(6)

        assert best['name'] == 'another_tunable'
        assert best['config'] == {'a_parameter': 2}

Example #11

0

Show file

def train_btb(X_train,X_test,y_train,y_test,mtype,common_name_model,problemtype,classes,default_featurenames,transform_model,settings,model_session):

	# create file names
	model_name=common_name_model+'.pickle'
	folder='btb_session'
	csvname=common_name_model.split('_')[0]
	curdir=os.getcwd()
	files=list()

	# make a temporary folder for the training session
	try:
		os.mkdir(folder)
		os.chdir(folder)
	except:
		shutil.rmtree(folder)
		os.mkdir(folder)
		os.chdir(folder)

	# get training and testing data
	try:
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_train_transformed.csv',os.getcwd()+'/train.csv')
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_test_transformed.csv',os.getcwd()+'/test.csv')
	except:
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_train.csv',os.getcwd()+'/train.csv')  
		shutil.copy(curdir+'/'+model_session+'/data/'+csvname+'_test.csv',os.getcwd()+'/test.csv')

	# create required .JSON
	dataset_id, filename=create_json(folder, 'train.csv')
	os.mkdir(dataset_id)
	os.chdir(dataset_id)
	os.mkdir('tables')
	shutil.copy(curdir+'/'+folder+'/train.csv', os.getcwd()+'/tables/train.csv')

	if mtype=='c':

		def build_model(name, hyperparameters):
			model_class = models[name]
			return model_class(random_state=0, **hyperparameters)

		def score_model(name, hyperparameters):
			model = build_model(name, hyperparameters)
			scores = cross_val_score(model, X_train, y_train)
			return scores.mean()

		rf_hyperparams = {'n_estimators': IntHyperParam(min=10, max=500),
						'max_depth': IntHyperParam(min=10, max=500)}

		rf_tunable = Tunable(rf_hyperparams)
		print(rf_tunable)

		svc_hyperparams = {'C': FloatHyperParam(min=0.01, max=10.0),
							'gamma': FloatHyperParam(0.000000001, 0.0000001)}

		svc_tunable = Tunable(svc_hyperparams)
		print(svc_tunable)

		tuners = {'RF': rf_tunable,
				  'SVC': svc_tunable}

		print(tuners)

		models = {'RF': RandomForestClassifier,
				  'SVC': SVC}

		selector = UCB1(['RF', 'SVC'])

		session = BTBSession(tuners, score_model, verbose=True)
		best_proposal = session.run(iterations=100)  
		best_model = build_model(best_proposal['name'], best_proposal['config'])
		best_model.fit(X_train, y_train)
		accuracy =  best_model.score(X_test, y_test)

		# tuner.record(parameters, score)
		print('ACCURACY:')
		print(accuracy)

		# now save the model in .pickle
		os.chdir(curdir)
		f=open(model_name,'wb')
		pickle.dump(best_model, f)
		f.close()


	elif mtype == 'r':


		tunables = {
			'random_forest': {
				'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
				'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
				'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
				'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
			},
			'extra_trees': {
				'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
				'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
				'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
				'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
			}
		}

		models = {
			'random_forest': RandomForestRegressor,
			'extra_trees': ExtraTreesRegressor,
		}

		def build_model(name, hyperparameters):
			model_class = models[name]
			return model_class(random_state=0, **hyperparameters)

		def score_model(name, hyperparameters):
			model = build_model(name, hyperparameters)
			r2_scorer = make_scorer(r2_score)
			scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer)
			return scores.mean()


		session = BTBSession(tunables, score_model, verbose=True)
		best_proposal = session.run(iterations=100)  
		best_model = build_model(best_proposal['name'], best_proposal['config'])

		best_model.fit(X_train, y_train)
		pred = best_model.predict(X_test)

		r2_score=r2_score(y_test, pred)

		print('R2 score!!')
		print(r2_score)
		
		# now save the model in .pickle
		os.chdir(curdir)
		f=open(model_name,'wb')
		pickle.dump(best_model, f)
		f.close()

	files.append(model_name)
	files.append(folder)
	model_dir=os.getcwd()

	return model_name, model_dir, files

Example #12

0

Show file

def test_session():
    def build_model(name, hyperparameters):
        model_class = models[name]
        return model_class(random_state=0, **hyperparameters)

    def score_model(name, hyperparameters):
        model = build_model(name, hyperparameters)
        r2_scorer = make_scorer(r2_score)
        scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer)
        return scores.mean()

    dataset = load_dataset()

    X_train, X_test, y_train, y_test = train_test_split(dataset.data,
                                                        dataset.target,
                                                        test_size=0.3,
                                                        random_state=0)

    tunables = {
        'random_forest': {
            'n_estimators': {
                'type': 'int',
                'default': 2,
                'range': [1, 1000]
            },
            'max_features': {
                'type': 'str',
                'default': 'log2',
                'range': [None, 'auto', 'log2', 'sqrt']
            },
            'min_samples_split': {
                'type': 'int',
                'default': 2,
                'range': [2, 20]
            },
            'min_samples_leaf': {
                'type': 'int',
                'default': 2,
                'range': [1, 20]
            },
        },
        'extra_trees': {
            'n_estimators': {
                'type': 'int',
                'default': 2,
                'range': [1, 1000]
            },
            'max_features': {
                'type': 'str',
                'default': 'log2',
                'range': [None, 'auto', 'log2', 'sqrt']
            },
            'min_samples_split': {
                'type': 'int',
                'default': 2,
                'range': [2, 20]
            },
            'min_samples_leaf': {
                'type': 'int',
                'default': 2,
                'range': [1, 20]
            },
        }
    }

    models = {
        'random_forest': RandomForestRegressor,
        'extra_trees': ExtraTreesRegressor,
    }

    session = BTBSession(tunables, score_model, verbose=True)
    session.run(2)