def run_decisiontree( X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: pd.DataFrame, y_test: pd.DataFrame, config: Dict[str, Any], ) -> Any: LOGGER.info("Finding best decision tree..") search_space = { "type": "decision_tree", "max_depth": hp.uniformint("max_depth", 2, 15), "min_samples_split": hp.uniformint("n_estimators", 2, 20), # "class_weight": hp.choice("class_weight", ["balanced"]), } best_params = hyperopt_search(X_train, y_train, search_space, config) model = make_pipeline(DecisionTreeClassifier(**best_params)) mean_cross_val_score = cross_validate_model(model, X_train, y_train) LOGGER.info( f"Decision tree cross validation score: {mean_cross_val_score}") if config["test"]: print(classification_report(model.predict(X_test), y_test)) return model
class KMeansOptimizationJob(HyperparameterOptimizationJob): def __init__( self, get_cluster_score: Callable[[DataFrame], float] = None, additional_weight_function: Callable[[int], float] = lambda e: 1.0 ) -> None: self.additional_weight_function = additional_weight_function if get_cluster_score: self.get_cluster_score = get_cluster_score app_name: str = "KMeansOptimizationJob" search_space: Apply = hp.choice('model', [('kmeans', { 'k': hp.uniformint('k', 4, 20), 'distance_measure': hp.choice("distance_measure", ['euclidean', 'cosine']), 'window_size': hp.uniformint('window_size', 2, 5), 's': hp.uniform('s', .5, 5) })]) def get_clusters(self, parameters: dict, urls_and_vectors: DataFrame) -> DataFrame: urls_and_vectors = urls_and_vectors.cache() model = KMeansJob.get_model(urls_and_vectors, parameters["k"], parameters["distance_measure"]) clustered_url_vectors = model.transform(urls_and_vectors) urls_and_vectors.unpersist() return clustered_url_vectors
def test_uniformint_arguments(arguments): """ Test whether uniformint can accept both positional and keyword arguments. Related to PR #704. """ if isinstance(arguments, list): space = hp.uniformint(*arguments) if isinstance(arguments, dict): space = hp.uniformint(**arguments) rng = np.random.RandomState(123) values = [sample(space, rng=rng) for _ in range(10)] assert values == [7, 3, 2, 6, 7, 4, 10, 7, 5, 4]
def test_uniformint_arguments(arguments): """ Test whether uniformint can accept both positional and keyword arguments. Related to PR #704. """ if isinstance(arguments, list): space = hp.uniformint(*arguments) if isinstance(arguments, dict): space = hp.uniformint(**arguments) rng = np.random.default_rng(np.random.PCG64(123)) values = [sample(space, rng=rng) for _ in range(10)] assert values == [7, 1, 2, 2, 2, 8, 9, 3, 8, 9]
def testConvertHyperOpt(self): from ray.tune.suggest.hyperopt import HyperOptSearch from hyperopt import hp # Grid search not supported, should raise ValueError with self.assertRaises(ValueError): HyperOptSearch.convert_search_space({ "grid": tune.grid_search([0, 1]) }) config = { "a": tune.sample.Categorical([2, 3, 4]).uniform(), "b": { "x": tune.sample.Integer(-15, -10), "y": 4, "z": tune.sample.Float(1e-4, 1e-2).loguniform() } } converted_config = HyperOptSearch.convert_search_space(config) hyperopt_config = { "a": hp.choice("a", [2, 3, 4]), "b": { "x": hp.uniformint("x", -15, -10), "y": 4, "z": hp.loguniform("z", np.log(1e-4), np.log(1e-2)) } } searcher1 = HyperOptSearch( space=converted_config, random_state_seed=1234, metric="a", mode="max") searcher2 = HyperOptSearch( space=hyperopt_config, random_state_seed=1234, metric="a", mode="max") config1 = searcher1.suggest("0") config2 = searcher2.suggest("0") self.assertEqual(config1, config2) self.assertIn(config1["a"], [2, 3, 4]) self.assertIn(config1["b"]["x"], list(range(-15, -10))) self.assertEqual(config1["b"]["y"], 4) self.assertLess(1e-4, config1["b"]["z"]) self.assertLess(config1["b"]["z"], 1e-2) searcher = HyperOptSearch(metric="a", mode="max") analysis = tune.run( _mock_objective, config=config, search_alg=searcher, num_samples=1) trial = analysis.trials[0] assert trial.config["a"] in [2, 3, 4] mixed_config = {"a": tune.uniform(5, 6), "b": hp.uniform("b", 8, 9)} searcher = HyperOptSearch(space=mixed_config, metric="a", mode="max") config = searcher.suggest("0") self.assertTrue(5 <= config["a"] <= 6) self.assertTrue(8 <= config["b"] <= 9)
def _search_space_from_dict(dict_hyperparams): hyperparams = {} if not isinstance(dict_hyperparams, dict): raise TypeError('Hyperparams must be a dictionary.') for name, hyperparam in dict_hyperparams.items(): hp_type = hyperparam['type'] if hp_type == 'int': hp_range = hyperparam.get('range') or hyperparam.get('values') hp_min = min(hp_range) if hp_range else None hp_max = max(hp_range) if hp_range else None hp_instance = hp.uniformint(name, hp_min, hp_max) elif hp_type == 'float': hp_range = hyperparam.get('range') or hyperparam.get('values') hp_min = min(hp_range) hp_max = max(hp_range) hp_instance = hp.uniform(name, hp_min, hp_max) elif hp_type == 'bool': hp_instance = hp.choice(name, [True, False]) elif hp_type == 'str': hp_choices = hyperparam.get('range') or hyperparam.get('values') hp_instance = hp.choice(name, hp_choices) hyperparams[name] = hp_instance return hyperparams
def full_hyper_space(self): from hyperopt import hp hyper_space, hyper_choices = super(Tfidf, self).full_hyper_space() hyper_space.update( {"fex_ngram_max": hp.uniformint("fex_ngram_max", 1, 3)}) return hyper_space, hyper_choices
def run_knn( X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: pd.DataFrame, y_test: pd.DataFrame, config: Dict[str, Any], ) -> Any: """ Finds optimal model parameters for a KNN classifier, evaluates model and return model object.""" LOGGER.info("Finding best knn..") search_space = { "type": "knn", "n_neighbors": hp.uniformint("n_neighbors", 2, 15), "weights": hp.choice("weights", ["uniform", "distance"]), } best_params = hyperopt_search(X_train, y_train, search_space, config) model = make_pipeline(get_scaler(config), KNeighborsClassifier(**best_params)) mean_cross_val_score = cross_validate_model(model, X_train, y_train) LOGGER.info(f"KNN cross validation score: {mean_cross_val_score}") if config["test"]: print(classification_report(model.predict(X_test), y_test)) return model
def run_gradient_boosting_classifier( X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: pd.DataFrame, y_test: pd.DataFrame, config: Dict[str, Any], ) -> Tuple[Any, Any]: LOGGER.info("Running Gradient boosting classifier..") if config["find_optimal_model"]: search_space = { "type": "gradient_boosting", "max_depth": hp.uniformint("max_depth", 2, 15), "n_estimators": hp.uniformint("n_estimators", 50, 300), "max_features": hp.choice("max_features", ("auto", "sqrt", None)), "learning_rate": hp.quniform("learning_rate", 0.025, 0.5, 0.025), } best_params = hyperopt_search(X_train, y_train, search_space, config) model = make_pipeline(GradientBoostingClassifier(**best_params)).fit( X_train, y_train) mean_cross_val_score = cross_validate_model(model, X_train, y_train) LOGGER.info( f"Gradient boosting classifier cross validation score: {mean_cross_val_score}" ) else: model = make_pipeline( GradientBoostingClassifier( **config["models"]["gradient_boosting"])).fit( X_train, y_train) if config["test"]: print(classification_report(model.predict(X_test), y_test)) if config["test"]: y_pred = model.predict(X_test) score = accuracy_score(y_pred=y_pred, y_true=y_test) LOGGER.info( f"The gradient boosting classifier has a train accuracy of {score}" ) return model, y_pred return model, None
def test_remove_allpaths_int(): z = hp.uniformint("z", 0, 10) a = hp.choice("a", [z + 1, z - 1]) hps = {} expr_to_config(a, (True, ), hps) aconds = hps["a"]["conditions"] zconds = hps["z"]["conditions"] assert aconds == set([(True, )]), aconds assert zconds == set([(True, )]), zconds
def test_remove_allpaths_int(): z = hp.uniformint('z', 0, 10) a = hp.choice('a', [ z + 1, z - 1]) hps = {} expr_to_config(a, (True,), hps) aconds = hps['a']['conditions'] zconds = hps['z']['conditions'] assert aconds == set([(True,)]), aconds assert zconds == set([(True,)]), zconds
def test_remove_allpaths_int(): z = hp.uniformint('z', 0, 10) a = hp.choice('a', [z + 1, z - 1]) hps = {} expr_to_config(a, (True, ), hps) aconds = hps['a']['conditions'] zconds = hps['z']['conditions'] assert aconds == set([(True, )]), aconds assert zconds == set([(True, )]), zconds
def full_hyper_space(self): from hyperopt import hp hyper_space, hyper_choices = super(Tfidf, self).full_hyper_space() hyper_choices.update({"fex_stop_words": ["english", "none"]}) hyper_space.update({ "fex_ngram_max": hp.uniformint("fex_ngram_max", 1, 3), "fex_stop_words": hp.choice('fex_stop_words', hyper_choices["fex_stop_words"]), }) return hyper_space, hyper_choices
def run_random_forest( X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: pd.DataFrame, y_test: pd.DataFrame, config: Dict[str, Any], ) -> Tuple[Any, Any]: LOGGER.info("Running Random Forest model..") if config["find_optimal_model"]: search_space = { "type": "random_forest", "max_depth": hp.uniformint("max_depth", 2, 30), "n_estimators": hp.uniformint("n_estimators", 10, 1000), "max_features": hp.choice("max_features", ("auto", "sqrt", None)), } best_params = hyperopt_search(X_train, y_train, search_space, config) model = make_pipeline(RandomForestClassifier(**best_params)) mean_cross_val_score = cross_validate_model(model, X_train, y_train) LOGGER.info( f"Random Forest classifier cross validation score: {mean_cross_val_score}" ) else: model = make_pipeline( RandomForestClassifier(**config["models"]["random_forest"])).fit( X_train, y_train) if config["test"]: print(classification_report(model.predict(X_test), y_test)) if config["test"]: y_pred = model.predict(X_test) score = accuracy_score(y_pred=y_pred, y_true=y_test) LOGGER.info(f"Random forest model has a train accuracy of {score}") return model, y_pred return model, None
def tune_custom_model_a_hyperparameters( episodes_folder: str, save_folder: str, potential_training_file_nbs: List[int], potential_validation_file_nbs: List[int], cpickled_trials_path: str = None): """cpickled_trials_path can be used to resume the tuning. By default it will be in the save_folder and have the file name Trials.xz (as we use lzma-compression with compress-pickle).""" if cpickled_trials_path is None: cpickled_trials_path = os.path.join(save_folder, "Trials.xz") if not os.path.exists(cpickled_trials_path): trials = Trials() current_nb_runs = 0 else: with open(cpickled_trials_path, 'rb') as file: trials = compress_pickle.load(file, compression="lzma") current_nb_runs = len(trials.trials) best_hyperparameters = None while current_nb_runs < TUNING_NB_RUNS: best_hyperparameters = fmin( tune_model_a, space=( hp.loguniform('learning_rate', math.log(10**-5), math.log(10**-3)), hp.loguniform('regularization_strength', math.log(10**-4), math.log(10**-2)), hp.uniformint('nb_frames_to_stack', 2, 25), hp.choice('episodes_folder', [episodes_folder]), # not really a choice hp.choice('save_folder', [save_folder]), # just a way to pass more parameters hp.choice('potential_training_file_nbs', [potential_training_file_nbs]), hp.choice('potential_validation_file_nbs', [potential_validation_file_nbs])), algo=tpe.suggest, max_evals=current_nb_runs + 1, # just keep going (Note: messes with the progress bar) trials=trials) current_nb_runs += 1 # (after the += 1: == len(trials.trials)) # Save after every tuning run with open(cpickled_trials_path, "wb") as file: compress_pickle.dump(trials, file, compression="lzma") print(best_hyperparameters) print(trials.best_trial["result"]["loss"])
class BisectingKMeansOptimizationJob(HyperparameterOptimizationJob): def __init__(self, get_cluster_score: Callable[[DataFrame], float] = None, additional_weight_function: Callable[[int], float] = lambda e: 1.0) -> None: self.additional_weight_function = additional_weight_function if get_cluster_score: self.get_cluster_score = get_cluster_score app_name: str = "BisectingKMeansOptimizationJob" search_space: Apply = hp.choice('model', [('bisecting_kmeans', {'k': hp.uniformint('k', 4, 20), 'distance_measure': hp.choice("distance_measure", ['euclidean', 'cosine']), 'window_size': hp.uniformint('window_size', 2, 5), 's': hp.uniform('s', .5, 5)})]) def get_clusters(self, parameters: dict, urls_and_vectors: DataFrame) -> DataFrame: urls_and_vectors = urls_and_vectors.cache() bisecting_kmeans = BisectingKMeans().setK(parameters['k']).setDistanceMeasure( parameters['distance_measure']).setFeaturesCol("vector").setPredictionCol("cluster_id") model = bisecting_kmeans.fit(urls_and_vectors) clustered_url_vectors = model.transform(urls_and_vectors) urls_and_vectors.unpersist() return clustered_url_vectors
def _get_advisor(self): logger.log(logging.INFO, 'Start Getting Train job Advisor') try: param_types = self._model_class.get_param_type() for param_key, param_value_list in self._train_params.items(): if param_value_list[0] == 'choice': self._advisor[param_key] = hp.choice( param_key, param_value_list[1]) else: if param_types.get(param_key) == 'int': self._advisor[param_key] = hp.uniformint( param_key, param_value_list[1][0], param_value_list[1][1]) else: self._advisor[param_key] = hp.uniform( param_key, param_value_list[1][0], param_value_list[1][1]) logger.log(logging.INFO, 'Finish Getting Train job Advisor') except Exception as e: logger.log(logging.ERROR, repr(e))
'promotion_category': str, 'marketing_campaign': str, 'mouse_y': str, 'marketing_channel': str, 'marketing_creative_sub': str, 'site_level': str, 'acquired_date': str } ENGINEERING_PARAM_GRID = { 'preprocessor__numeric_transformer__log_creator__take_log': hp.choice( 'preprocessor__numeric_transformer__log_creator__take_log', ['yes', 'no']), 'preprocessor__categorical_transformer__category_combiner__combine_categories': hp.choice( 'preprocessor__categorical_transformer__category_combiner__combine_categories', ['yes', 'no']), 'preprocessor__categorical_transformer__feature_selector__percentile': hp.uniformint( 'preprocessor__categorical_transformer__feature_selector__percentile', 1, 100), 'preprocessor__numeric_transformer__feature_selector__percentile': hp.uniformint( 'preprocessor__numeric_transformer__feature_selector__percentile', 1, 100), } FOREST_PARAM_GRID = { 'model__base_estimator__max_depth': hp.uniformint('model__base_estimator__max_depth', 3, 16), 'model__base_estimator__min_samples_leaf': hp.uniform('model__base_estimator__min_samples_leaf', 0.001, 0.01), 'model__base_estimator__max_features': hp.choice('model__base_estimator__max_features', ['log2', 'sqrt']), } XGBOOST_PARAM_GRID = { 'model__base_estimator__learning_rate': hp.uniform('model__base_estimator__learning_ratee', 0.01, 0.5), 'model__base_estimator__n_estimators': hp.randint('model__base_estimator__n_estimators', 75, 150), 'model__base_estimator__max_depth': hp.randint('model__base_estimator__max_depth', 3, 16), 'model__base_estimator__min_child_weight': hp.uniformint('model__base_estimator__min_child_weight', 2, 16),
'logsig': True, 'sig_depth': 2, 'add_time': True, "use_timestamps": True, "t_max": hp.uniform('t_max', 0, 1), # not to explode activations "t_scale": hp.uniform('t_scale', 86400, 604800), # days and weeks 'leadlag': True, "split_paths": False, "min_count": 5, "batch_size": 128, "d_embedding": scope.int(hp.quniform('d_embedding', 16, 64, 1)), "epochs": 20, "lr": hp.loguniform('lr', np.log(1e-5), np.log(1e-2)), "wd": hp.loguniform('wd', np.log(1e-7), np.log(1e-4)), "patience": 5, "feedforward_num_layers": hp.uniformint('feedforward_num_layers', 1, 2), "embedding_dropout_p": 0, "verbose": True, # "testing_subsample_size": 1000 "feedforward_hidden_dims": scope.int(hp.quniform('feedforward_hidden_dims', 32, 256, 4)), "feedforward_activations": "relu", "feedforward_dropout": hp.uniform('feedforward_dropout', 0, 0.7), "training_proportion": p, "evaluate_on_test": False } space.update({ "name": "{name}_{version}_logsig{logsig}_sigdepth{sig_depth}_leadlag{leadlag}_addtime_{add_time}_timestamps{use_timestamps}_allcode{all_code_types}_trainprop{training_proportion}".format_map( space)}) # Set mongo trail name
kf = StratifiedShuffleSplit(10, test_size=0.5, random_state=42) ep_fake = np.empty((288, 22, 500)) lb_fake = np.r_[1 * np.ones(int(len(ep_fake) / 2)), 2 * np.ones(int(len(ep_fake) / 2))] for idx_search, idx_test in kf.split(ep_fake, lb_fake): # print('Search:', idx_search, '\n\n') # print('Test:', idx_test, '\n\n') ACC = [] for suj in subjects: for class_ids in classes: H = H.iloc[0:0] # cleaning df print(f'###### {suj} {class_ids} ######') fl_ = hp.uniformint("fl", 0, 15) fh_ = hp.uniformint("fh", 10, 25) space = ( { "fl": fl_ }, { "fh": (fh_ + fl_) }, # fl_ + 20, # hp.uniform("fh", (10 + fl_), 40), hp.quniform('tmin', 0, 2, 0.5), hp.quniform('tmax', 2, 4, 0.5), # hp.quniform('ncomp', 2, 10, 2), hp.choice('ncomp', [2, 4, 6, 8, 22]), hp.uniformint('nbands', 1, 25), # hp.quniform('reg', -5, 0, 1), hp.pchoice(
from ray.tune.suggest.hyperopt import HyperOptSearch from tune_demo.train import get_iris_data, rf_cv # %% def eval_model(config): X, y = get_iris_data() for i in range(5): acc = rf_cv(config, X, y) tune.track.log(acc=acc) # %% space = { "max_depth": hp.uniformint("max_depth", 1, 20), "n_estimators": hp.uniformint("n_estimators", 10, 1000), } hyperopt_search = HyperOptSearch( space=space, max_concurrent=4, metric="acc", gamma=0.2, ) analysis = tune.run( eval_model, num_samples=50, scheduler=ASHAScheduler(metric="acc", mode="max"), search_alg=hyperopt_search,
'clf_details', 'as_train', 'as_test', 'sb_dft', 'sb_iir', 'cla_dft', 'cla_iir' ] R = pd.DataFrame(columns=header) ##%% ########################################################################### for suj in subjects: sname = 'A0' + str(suj) + '' data, events, info = np.load('/mnt/dados/eeg_data/IV2a/npy/' + sname + '.npy', allow_pickle=True) for class_ids in classes: # data, events, info = np.load('/mnt/dados/eeg_data/IV2a/npy/'+sname+'T.npy', allow_pickle=True) print(f'###### {suj} {class_ids} ######') space = ( hp.uniformint('fl', 0, 20), hp.uniformint('fh', 21, 50), hp.quniform('tmin', 0, 2, 0.5), hp.quniform('tmax', 2, 4, 0.5), hp.quniform('ncomp', 2, 22, 2), hp.uniformint('nbands', 1, 50), # hp.choice('clf', [{ 'model': 'LDA' }, { 'model': 'SVM', 'C': hp.quniform('C', -8, 0, 1), 'kernel': hp.choice('kernel', [{ 'kf': 'linear'
def optimization_space(): return { 'torch_health': hp.uniformint('torch_health', 1, 10), 'torch_dmg': hp.uniformint('torch_dmg', 1, 10), # 'torch_weight': hp.uniformint('torch_weight', 1, 10), 'torch_torch_range': hp.uniformint('torch_torch_range', 1, 4), 'torch_duration': hp.uniformint('torch_duration', 1, 6), 'torch_cooldown': hp.uniformint('torch_cooldown', 1, 6), 'torch_ticks_between_moves': hp.uniformint('torch_ticks_between_moves', 1, 6), # SawBot parameters 'saw_health': hp.uniformint('saw_health', 1, 10), 'saw_dmg_min': hp.uniformint('saw_dmg_min', 1, 10), 'saw_dmg_max': hp.uniformint('saw_dmg_max', 1, 10), # 'saw_weight': hp.uniformint('saw_weight', 1, 10), 'saw_duration': hp.uniformint('saw_duration', 1, 6), 'saw_cooldown': hp.uniformint('saw_cooldown', 1, 6), 'saw_ticks_between_moves': hp.uniformint('saw_ticks_between_moves', 1, 6), # NailBot parameters 'nail_health': hp.uniformint('nail_health', 1, 10), 'nail_dmg': hp.uniformint('nail_dmg', 1, 10), # 'nail_weight': hp.uniformint('nail_weight', 1, 10), 'nail_cooldown': hp.uniformint('nail_cooldown', 1, 6), 'nail_ticks_between_moves': hp.uniformint('nail_ticks_between_moves', 1, 6) }
cpahis[53:] ]))) * 2.64 # rt in different period return r1, report5Q, cpahis, periodRt def SimulationScenarioOrigin(): config = [0, True, False, 0, False, 0, True] result = list( map(lambda x: SimulateBaselineModel(op, auxdata, config, TargetDay=81), range(0, 500))) r1, report5Q, S_cHIS, periodRt = CreateSummary(result, 'originalResult') space = { 'inik': hp.uniformint('inik', 1, 10), 'iniexp': hp.uniformint('iniexp', 1, 10), 'baseline': hp.uniform('x', 0.03, 0.07), # baseline 'socialconnection': hp.uniform('y', 0.3, 0.7), # socialconnection 'gamma1': hp.uniform('gamma1', 1 / 14, 1 / 9), # gamma1 'gamma2': hp.uniform('gamma2', 1 / 14, 1 / 6), # gamma2 'gamma3': hp.uniform('gamma3', 1 / 6, 1 / 4), # gamma3 'gamma4': hp.uniform('gamma4', 1 / 5, 1 / 3), # gamma4 'gamma5': hp.uniform('gamma5', 1 / 4, 1 / 2) # gamma5 } auxdata = list(GetLaiTestData(fed)) if OptimizeTheConfig: config = [0, True, False, 0, False, 0, True] best = fmin(fn=lambda x: BatchEvaluationBaselineModel(x, auxdata, config),
previsoes[rodada] = {} resultados = [] resultado = {} for clf in classificadores: neg_pos_rate = 20 # espaco de busca if type(clf).__name__ == 'XGBClassifier': X_train = X_train_tree.copy() y_train = y_train_tree.copy() X_test = X_test_tree.copy() y_test = y_test_tree.copy() space = { 'n_estimators': hp.uniformint('n_estimators', 50, 250), 'max_depth': hp.uniformint('max_depth', 1, 14), 'learning_rate': hp.loguniform('learning_rate', -5, 0), 'min_child_weight': hp.uniformint('min_child_weight', 1, 10), 'subsample': hp.uniform('subsample', 0.7, 1.0), 'gamma': hp.uniform('gamma', 0.5, 1.2), 'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0), 'alpha': hp.uniformint('alpha', 1, 2), 'lambda': hp.uniform('lambda', 1.0, 2.0), 'scale_pos_weight': neg_pos_rate } func = xgb_cv if type(clf).__name__ == 'LogisticRegression': X_train = X_train_linear.copy()
logger.debug(f"Running using params:\n{fullParams}") return -1 * run(fullParams) if __name__ == "__main__": templatesDir = sys.argv[1] paramsFile = sys.argv[2] device = sys.argv[3] deviceIdx = sys.argv[4] # used for loguniform c = log(10.0) space = { "basisSize": hp.uniformint("basisSize", 3, 10), "embeddingSize": hp.uniformint("embeddingSize", 500, 5000), "l1_residuCoeff": hp.uniform("l1_residuCoeff", 0.1, 10), } trials = Trials() best = fmin(objective, space, algo=tpe.suggest, catch_eval_exceptions=True, max_evals=200) logger.info(f"Best configuration:\n{best}") logger.debug(f"Trials:\n{trials}")
def svm_using_jda_feature(cls): params = utils.ConfigDict( C=hp.uniform('C', 0.001, 100), svm_kernel=hp.choice('svm_kernel', [ { 'name' : 'rbf', 'gamma': hp.uniform('rbf_gamma_uniform', 0.001, 10) }, { 'name': 'linear', }, { 'name' : 'sigmoid', 'gamma': hp.uniform('sigmoid_gamma_uniform', 0.001, 100), 'coef0': hp.uniform('sigmoid_coef0', 0, 10) }, # { # 'name' : 'poly', # 'gamma' : hp.uniform('poly_gamma_uniform', 0.001, 100), # 'coef0' : hp.uniform('poly_coef0', 0, 10), # 'degree': hp.uniformint('poly_degree', 2, 3), # } ]), jda_kernel=hp.choice('jda_kernel', [ { 'name' : 'primal', 'gamma': 1, }, { 'name' : 'linear', 'gamma': 1, }, { 'name' : 'rbf', 'gamma': hp.uniform('jda_gamma', 0.001, 10) } ]), lamb=hp.uniform('jda_lamb', 0.001, 1), T=hp.uniformint('jda_iterations', 1, 20), ) def choose_best_params(config): print('\n---------------------------------------------------------------------------------------') print('Params: ') print(config) jda_kernel_config = config['jda_kernel'] jda_kernel = jda_kernel_config.pop('name') Xs_new, Ys_new, Xt_new, Yt_new = cls.loader.load_jda_data(cls.Xs, cls.Ys, cls.Xt, cls.Yt, kernel=jda_kernel, **jda_kernel_config, lamb=config['lamb'], T=config['T']) svm_kernel_config = config['svm_kernel'] svm_kernel = svm_kernel_config.pop('name') C = config['C'] # uar clf = make_pipeline(StandardScaler(), SVC(kernel=svm_kernel, tol=0.001, random_state=666, shrinking=True, C=C, **svm_kernel_config)) clf.fit(Xs_new, Ys_new) Yt_pred = clf.predict(Xt_new) distance = mmd(Xs_new, Xt_new) matrix = confusion_matrix(Yt_new, Yt_pred) report = classification_report(Yt_new, Yt_pred) print('Result:') print(matrix) print(report) print('---------------------------------------------------------------------------------------\n') report_print = classification_report(Yt_new, Yt_pred, output_dict=True) uar = report_print['macro avg']['recall'] return { 'loss' : distance - uar, 'status': STATUS_OK } trials = Trials() best = fmin(fn=choose_best_params, space=params, algo=tpe.suggest, max_evals=100, trials=trials) print(best) print('best: ', trials.best_trial)
"leadlag": False, "batch_size": 128, "verbose": True, "epochs": 20, "lr": hp.loguniform('lr', np.log(1e-5), np.log(1e-1)), "wd": hp.loguniform('wd', np.log(1e-7), np.log(1e-2)), "hidden_rnn_sz": scope.int(hp.quniform('hidden_rnn_sz', 32, 128, 1)), "rnn_num_layers": hp.uniformint('rnn_num_layers', 1, 2), "patience": 10, "rnn_dropout": hp.uniform('rnn_dropout', 0, 0.9), "feedforward_num_layers": hp.uniformint('feedforward_num_layers', 1, 3), "min_count": 5, "testing_subsample_size": None, # "testing_subsample_size": 1000 "feedforward_hidden_dims": scope.int(hp.quniform('feedforward_hidden_dims', 32, 256, 4)), "feedforward_activations": "relu",
import argparse import os import pickle import sys import numpy as np from hyperopt import hp, STATUS_OK, trials_from_docs, Trials, partial, tpe, fmin from hyperopt.pyll import scope from gym_locm.agents import MaxAttackBattleAgent, GreedyBattleAgent, MaxAttackDraftAgent from gym_locm.toolbox.trainer import AsymmetricSelfPlay, model_builder_mlp, model_builder_lstm hyperparameter_space = { 'switch_freq': hp.choice('switch_freq', [10, 100, 1000]), 'layers': hp.uniformint('layers', 1, 3), 'neurons': hp.uniformint('neurons', 24, 256), 'activation': hp.choice('activation', ['tanh', 'relu', 'elu']), 'n_steps': scope.int(hp.quniform('n_steps', 30, 300, 30)), 'nminibatches': scope.int(hp.quniform('nminibatches', 1, 300, 1)), 'noptepochs': scope.int(hp.quniform('noptepochs', 3, 20, 1)), 'cliprange': hp.quniform('cliprange', 0.1, 0.3, 0.1), 'vf_coef': hp.quniform('vf_coef', 0.5, 1.0, 0.5), 'ent_coef': hp.uniform('ent_coef', 0, 0.01), 'learning_rate': hp.loguniform('learning_rate', np.log(0.00005), np.log(0.01)), } _counter = 0 def get_arg_parser():
def bo_tpe_lightgbm(X, y): # 参考 # https://qiita.com/TomokIshii/items/3729c1b9c658cc48b5cb data = X target = y # 2次数据划分,这样可以分成3份数据 test train validation X_intermediate, X_test, y_intermediate, y_test = train_test_split( data, target, shuffle=True, test_size=0.2, random_state=1) # train/validation split (gives us train and validation sets) X_train, X_validation, y_train, y_validation = train_test_split( X_intermediate, y_intermediate, shuffle=False, test_size=0.25, random_state=1) # delete intermediate variables del X_intermediate, y_intermediate # 显示数据集的分配比例 print('train: {}% | validation: {}% | test {}%'.format( round((len(y_train) / len(target)) * 100, 2), round((len(y_validation) / len(target)) * 100, 2), round((len(y_test) / len(target)) * 100, 2))) starttime = datetime.datetime.now() space = { # 'learning_rate': hp.uniform('learning_rate', 0.001, 0.5), # 'minibatch_frac': hp.choice('minibatch_frac', [1.0, 0.5]), # 'Base': hp.choice('Base', [b1, b2, b3]) "lambda_l1": hp.uniform("lambda_l1", 1e-8, 1.0), "lambda_l2": hp.uniform("lambda_l2", 1e-8, 1.0), "min_child_samples": hp.uniformint("min_child_samples", 5, 100), 'learning_rate': hp.uniform("learning_rate", 0.001, 0.5), "n_estimators": hp.uniformint("n_estimators", 10, 100), "num_leaves": hp.uniformint("num_leaves", 5, 35) } # n_estimators表示一套参数下,有多少个评估器,简单说就是迭代多少次 default_params = { # "n_estimators": 80, "random_state": 1, "objective": "regression", "boosting_type": "gbdt", # "num_leaves": 30, # "learning_rate": 0.3, "feature_fraction": 0.9, "bagging_fraction": 0.8, "bagging_freq": 5, "verbose": -1, } def objective(params): # 下面这个是分类classification使用的模型,不能用在regressor # dtrain = lgb.Dataset(X_train, label=y_train) params.update(default_params) clf = lgb.LGBMRegressor(**params) score = -np.mean( cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) return {'loss': score, 'status': STATUS_OK} trials_lgb = Trials() with warnings.catch_warnings(): warnings.simplefilter("ignore") best = fmin( fn=objective, space=space, algo=tpe.suggest, # max_evals是设定多少套参数组合,组合数越大准确度可能更高但是训练的时间越长 max_evals=50, trials=trials_lgb) best_params = space_eval(space, best) lgb_model = lgb.LGBMRegressor(**best_params).fit( X_train, y_train, eval_set=[(X_validation, y_validation)], verbose=-1, # 假定n_estimators迭代器有100个设定了早期停止后也许不到100次迭代就完成了训练停止了 early_stopping_rounds=2) y_pred = lgb_model.predict(X_test) test_MSE_lgb = mean_squared_error(y_pred, y_test) print("LightGBM MSE score:%.4f" % test_MSE_lgb) endtime = datetime.datetime.now() process_time_lgb = endtime - starttime print("程序执行时间(秒):{}".format(process_time_lgb)) print("最佳超参数值集合:", best_params) save_model_object(lgb_model, 'BO-TPE', 'NGBoost', 'NGBoost') return test_MSE_lgb, process_time_lgb, best_params
from tune_sklearn import TuneSearchCV from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from hyperopt import hp digits = datasets.load_digits() X = digits.data y = digits.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) space = { "n_estimators": hp.uniformint("n_estimators", 100, 200), "min_weight_fraction_leaf": (0.0, 0.5), "min_samples_leaf": hp.uniformint("min_samples_leaf", 1, 5) } tune_search = TuneSearchCV(RandomForestClassifier(), space, search_optimization="hyperopt", n_trials=3) tune_search.fit(X_train, y_train) print(tune_search.cv_results_) print(tune_search.best_params_)