def test_searchcv_reproducibility(): """ Test whether results of BayesSearchCV can be reproduced with a fixed random state. """ X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0) random_state = 42 opt = BayesSearchCV(SVC(random_state=random_state), { 'C': Real(1e-6, 1e+6, prior='log-uniform'), 'gamma': Real(1e-6, 1e+1, prior='log-uniform'), 'degree': Integer(1, 8), 'kernel': Categorical(['linear', 'poly', 'rbf']), }, n_iter=11, random_state=random_state) opt.fit(X_train, y_train) best_est = opt.best_estimator_ optim_res = opt.optimizer_results_[0].x opt2 = clone(opt).fit(X_train, y_train) best_est2 = opt2.best_estimator_ optim_res2 = opt2.optimizer_results_[0].x assert getattr(best_est, 'C') == getattr(best_est2, 'C') assert getattr(best_est, 'gamma') == getattr(best_est2, 'gamma') assert getattr(best_est, 'degree') == getattr(best_est2, 'degree') assert getattr(best_est, 'kernel') == getattr(best_est2, 'kernel') # dict is sorted by alphabet assert optim_res[0] == getattr(best_est, 'C') assert optim_res[2] == getattr(best_est, 'gamma') assert optim_res[1] == getattr(best_est, 'degree') assert optim_res[3] == getattr(best_est, 'kernel') assert optim_res2[0] == getattr(best_est, 'C') assert optim_res2[2] == getattr(best_est, 'gamma') assert optim_res2[1] == getattr(best_est, 'degree') assert optim_res2[3] == getattr(best_est, 'kernel')
def test_dimensions_names(): from skopt.space import Real, Categorical, Integer # create search space and optimizer space = [ Real(0, 1, name='real'), Categorical(['a', 'b', 'c'], name='cat'), Integer(0, 1, name='int') ] opt = Optimizer(space, n_initial_points=1) # result of the optimizer missing dimension names result = opt.tell([(0.5, 'a', 0.5)], [3]) names = [] for d in result.space.dimensions: names.append(d.name) assert len(names) == 3 assert "real" in names assert "cat" in names assert "int" in names assert None not in names
def params_to_skopt(param_space: ParamSpace): """ Converts a parameter space to a list of Dimention objects that can be used with a skopt Optimizer. A skopt Optimizer only receives 3 types of Dimensions: Categorical, Real, or Integer we convert parameters from our parameter space into one of those 3 types. Note that we only convert parameters that have either bounds or with a categorical domain with more than 1 value. If we have constant values in our parameter space, these don't need to be optimized anyway. Another function is provided to convert skopt output values back into a dictionary with a full configuration according to the parameter space (@see values_to_params). Args: param_space: a ParameterSpace where we can get the domain of each parameter Returns: a list of Dimension that can be passed to a skopt Optimizer """ dimensions = [] for param_name in param_space.param_names(): domain_param = param_space.domain(param_name) domain = domain_param["domain"] dtype = DTypes.from_type(domain_param["dtype"]) if len(domain) > 1: if dtype == DTypes.INT: low = min(domain) high = max(domain) dimensions.append(Integer(low, high, name=param_name)) elif dtype == DTypes.FLOAT: low = min(domain) high = max(domain) prior = domain_param.get("prior", None) dimensions.append(Real(low, high, prior=prior, name=param_name)) elif dtype == DTypes.CATEGORICAL: prior = domain_param.get("prior", None) dimensions.append( Categorical(domain, prior, transform="onehot", name=param_name)) return dimensions
def param_search(): # Parameters what we wish to tune in case SIMPLE grid search ## Dictionary with parameters names (str) as keys ## and lists of parameter settings to try as values param_grid = { # The number of base estimators in the ensemble. # If base_estimator=None, then the base estimator is a DecisionTreeRegressor. "n_estimators": [10, 50, 100], # ^ subsample: default=1. Lower ratios avoid over-fitting "max_samples": [0.6, 0.8, 1], # ^ colsample default=1. Lower ratios avoid over-fitting. "max_features": [0.8, 0.9, 1], # ^ Whether samples are drawn with replacement. # ^ if False, sampling without replacement is performed. default=True "bootstrap": [True, False], # ^ Whether features are drawn with replacement. default=False "bootstrap_features": [True, False], } # Parameters' distributions tune in case RANDOMIZED grid search ## Dictionary with parameters names (str) as keys and distributions ## or lists of parameters to try. ## If a list is given, it is sampled uniformly. param_dist = { "n_estimators": [x for x in range(10, 101, 10)], "max_samples": [x / 100 for x in range(6, 101, 1)], "max_features": [x / 100 for x in range(7, 101, 1)], "bootstrap": [True, False], "bootstrap_features": [True, False], } # Their core idea of Bayesian Optimization is simple: # when a region of the space turns out to be good, it should be explored more. # Real: Continuous hyperparameter space. # Integer: Discrete hyperparameter space. # Categorical: Categorical hyperparameter space. bayes_space = { "n_estimators": Integer(50, 100), "max_samples": Real(0.5, 1.0), "max_features": Real(0.7, 1.0), "bootstrap": Categorical([True, False]), "bootstrap_features": Categorical([True, False]), } return param_dist, param_grid, bayes_space
def run_parameter_search(URM_train, ICM_all, W_sparse_CF, evaluator_test, metric_to_optimize="MAP", n_cases=10, n_random_starts=3, output_folder_path="result_experiments/"): recommender_class = CFW_D_Similarity_Linalg parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_test) hyperparameters_range_dictionary = { "topK": Integer(1, 2000), "add_zeros_quota": Real(low=0, high=0.1, prior='uniform'), "normalize_similarity": Categorical([True, False]) } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_all, W_sparse_CF], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Clone data structure to perform the fitting with the best hyper parameters on train + validation data recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, save_model="no", output_folder_path=output_folder_path, output_file_name_root=recommender_class.RECOMMENDER_NAME, metric_to_optimize=metric_to_optimize)
def __init__(self, configspace, **kwargs): super().__init__(configspace, reward_attribute=kwargs.get('reward_attribute')) self.hp_ordering = configspace.get_hyperparameter_names( ) # fix order of hyperparams in configspace. skopt_hpspace = [] with warning_filter(): try_import_skopt() from skopt import Optimizer from skopt.space import Integer, Real, Categorical for hp in self.hp_ordering: hp_obj = configspace.get_hyperparameter(hp) hp_type = str(type(hp_obj)).lower() # type of hyperparam if 'integer' in hp_type: hp_dimension = Integer(low=int(hp_obj.lower), high=int(hp_obj.upper), name=hp) elif 'float' in hp_type: if hp_obj.log: # log10-scale hyperparmeter hp_dimension = Real(low=float(hp_obj.lower), high=float(hp_obj.upper), prior='log-uniform', name=hp) else: hp_dimension = Real(low=float(hp_obj.lower), high=float(hp_obj.upper), name=hp) elif 'categorical' in hp_type: hp_dimension = Categorical(hp_obj.choices, name=hp) elif 'ordinal' in hp_type: hp_dimension = Categorical(hp_obj.sequence, name=hp) else: raise ValueError("unknown hyperparameter type: %s" % hp) skopt_hpspace.append(hp_dimension) skopt_keys = { 'base_estimator', 'n_random_starts', 'n_initial_points', 'acq_func', 'acq_optimizer', 'random_state', 'model_queue_size', 'acq_func_kwargs', 'acq_optimizer_kwargs' } skopt_kwargs = self._filter_skopt_kwargs(kwargs, skopt_keys) self.bayes_optimizer = Optimizer(dimensions=skopt_hpspace, **skopt_kwargs)
def run_parameter_search_mixed_similarity_item( recommender_object: HybridMixedSimilarityRecommender, URM_train, output_folder_path="result_experiments/", evaluator_validation=None, evaluator_test=None, n_cases=35, n_random_starts=5, metric_to_optimize="MAP"): print("Start search") if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_object.RECOMMENDER_NAME hyperparameters_range_dictionary = { "topK": Integer(1, 2000), "alpha1": Real(0, 1), "alpha2": Real(0, 1), "alpha3": Real(0, 1), "alpha4": Real(0, 1) } # Set args for recommender recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) parameterSearch = SearchBayesianSkoptObject( recommender_object, evaluator_validation=evaluator_validation) parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model="no")
def gpminimize_knn(X, y): starttime = datetime.datetime.now() reg = KNeighborsRegressor() space = [Integer(1, 20, name='n_neighbors')] @use_named_args(space) def objective(**params): reg.set_params(**params) return -np.mean( cross_val_score( reg, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) res_gp_knn = gp_minimize(objective, space, n_calls=10, random_state=0) print("KNN MSE score:%.4f" % res_gp_knn.fun) endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("程序执行时间(秒):{}".format(process_time_knn)) print("最佳超参数值集合:", res_gp_knn.x) save_model_object(res_gp_knn.models, 'gp_minimize', 'KNN', 'KNN') return res_gp_knn.fun, process_time_knn, res_gp_knn.x
def bo_knn(X, y): rf_params = { 'n_neighbors': Integer(1, 20), } starttime = datetime.datetime.now() clf = KNeighborsRegressor() Bayes_knn = BayesSearchCV(clf, rf_params, cv=3, n_iter=10, scoring='neg_mean_squared_error') Bayes_knn.fit(X, y) print("KNN MSE score:" + str(-Bayes_knn.best_score_)) endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("程序执行时间(秒):{}".format(process_time_knn)) print("最佳超参数值集合:", Bayes_knn.best_params_) save_model_object(Bayes_knn, 'BO-GP', 'KNN', 'KNN') return str( -Bayes_knn.best_score_), process_time_knn, Bayes_knn.best_params_
class MockEstimator(Estimator): name = "Mock Classifier" model_family = ModelFamily.NONE supported_problem_types = [ ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_BINARY ] hyperparameter_ranges = {'a': Integer(0, 10), 'b': Real(0, 10)} def __init__(self, a=1, b=0, random_seed=0): super().__init__(parameters={ "a": a, "b": b }, component_obj=None, random_seed=random_seed) def fit(self, X, y): return self
def indicator_space() -> List[Dimension]: """ Define your Hyperopt space for searching buy strategy parameters. """ buyTriggerList = [] for short in range(shortRangeBegin, shortRangeEnd, 5): for medium in range(mediumRangeBegin, mediumRangeEnd, 10): # The output will be (short, long) buyTriggerList.append( f"{short}.{medium}" ) bbWindowList = [] for bbWindow in range(bbWindowBegin, bbWindowEnd, 10): bbWindowList.append(bbWindow) return [ Integer(60, 90, name='rsi-value'), Categorical(buyTriggerList, name='trigger'), Categorical(bbWindowList, name='bb-window') ]
def runParameterSearch_SpectralCF(recommender_class, URM_train, earlystopping_parameters, output_file_name_root, n_cases = 35, evaluator_validation= None, evaluator_test=None, metric_to_optimize = "RECALL", output_folder_path ="result_experiments/"): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## if recommender_class is SpectralCF_RecommenderWrapper: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["batch_size"] = Categorical([1024]) hyperparameters_range_dictionary["embedding_size"] = Categorical([4, 8, 16, 32]) hyperparameters_range_dictionary["decay"] = Real(low = 1e-5, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["k"] = Integer(low = 1, high = 6) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_parameters ) ######################################################################################################### parameterSearch.search(recommender_parameters, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize)
def sell_indicator_space() -> List[Dimension]: """ Ranges andOptions for SELL strategy """ return [ Integer(kshortStart, kshortEnd, name='sell-kama-short-period'), Integer(klongStart, klongEnd, name='sell-kama-long-period'), Integer(cciStart, cciEnd, name='sell-cci-period'), Integer(rsiStart, rsiEnd, name='sell-rsi-period'), Integer(-200, -100, name='sell-cci-limit'), Integer(40, 90, name='sell-rsi-limit'), Categorical([True, False], name='sell-cci-enabled'), Categorical([True, False], name='sell-rsi-enabled'), Categorical(['cross', 'slope'], name='sell-kama-trigger') ]
def test_searchcv_run(): X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0) opt = BayesSearchCV( SVC(), { "C": Real(1e-6, 1e6, prior="log-uniform"), "gamma": Real(1e-6, 1e1, prior="log-uniform"), "degree": Integer(1, 8), "kernel": Categorical(["linear", "poly", "rbf"]), }, n_iter=11, cv=None, ) opt.fit(X_train, y_train) assert opt.score(X_test, y_test) > 0.9
def get_model(self, X, y): search_space = { 'C': Real(0.01, 1.0), 'kernel': Categorical(['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']), 'degree': Integer(2, 10), 'gamma': Categorical(['scale', 'auto']), 'tol': Real(0.00001, 0.0001) } model = BayesSearchCV(SVC(random_state=0), search_space, random_state=0, n_iter=1, cv=3, n_jobs=-1) model.fit(X, y) return model
def indicator_space() -> List[Dimension]: """ Define your Hyperopt space for searching buy strategy parameters. """ gene = list() for i in range(DNA_SIZE): gene.append(Categorical(GodGenes, name=f'buy-indicator-{i}')) gene.append(Categorical(GodGenes, name=f'buy-cross-{i}')) gene.append(Integer(-1, 101, name=f'buy-int-{i}')) gene.append(Real(-1.1, 1.1, name=f'buy-real-{i}')) # Operations # CA: Crossed Above, CB: Crossed Below, # I: Integer, R: Real, D: Disabled gene.append( Categorical([ "D", ">", "<", "=", "CA", "CB", ">I", "=I", "<I", ">R", "=R", "<R" ], name=f'buy-oper-{i}')) return gene
def create_nn_pdf_experiment(cov_type, data_loader, num_workers, num_samples, num_samples_best_eval): def params_converter(args): if data_loader.train_x.shape[1] > 0: args['arch1'] = [args['layer_size1']] * args['num_layers1'] args['arch2'] = [args['layer_size2']] * args['num_layers2'] args['positive_transform'] = "square" args['batch_size'] = 200 args['cov_type'] = cov_type # args['activation'] = tf.nn.relu if cov_type == "param_cov": args['arch_cov'] = [args['layer_size_cov'] ] * args['num_layers_cov'] return args opt_space = [ Integer(1, 5, name='num_layers2'), Integer(10, 200, name="layer_size2"), Real(10**-4, 10**-2, "log-uniform", name='learning_rate'), # Categorical(["relu","tanh","leaky_relu"], name="activation") # Integer(100, 500, name="batch_size"), ] if data_loader.train_x.shape[1] > 0: opt_space.extend([ Integer(0, 3, name='num_layers1'), Integer(10, 200, name="layer_size1") ]) if cov_type == "param_cov": opt_space.extend([ Integer(1, 3, name='num_layers_cov'), Integer(10, 200, name="layer_size_cov") ]) opt = Runner(num_workers=num_workers, num_samples=num_samples, num_samples_best_eval=num_samples_best_eval, space=opt_space, params_converter=params_converter, train_eval_model=nn_pdf_train_eval_model_factory( data_loader, cov_type)) opt.load_best_model_or_run() return opt
def __init__(self): # Best performing ensemble: ~0.77 (~1 MB size) # estimated R^2 of compact simulator: ~0.75 (~0.003 MB in size) # ... this will probably be gone soon and normal model will be used self.simulator = simulators.dnn_sim self.search_space = { 'model__lr': Real(1e-6, 1.0, prior='log-uniform'), 'model__mom': Real(0.01, 1.0, prior='log-uniform'), 'model__l1': Integer(1, 16), 'model__l2': Integer(1, 16), 'model__l3': Integer(1, 16), 'model__l4': Integer(1, 16), 'model__batch_size': Integer(32, 256), 'model__epochs': Integer(1, 128), } self.space = dimensions_aslist(self.search_space)
def indicator_space() -> List[Dimension]: return [ # Base Timeframe Integer(5, 15, name='base-yolo'), # Informative Timeframe Categorical(['lower', 'upper', 'both', 'none'], name='inf-guard'), Real(0.70, 0.99, name='inf-pct-adr-top'), Real(0.01, 0.20, name='inf-pct-adr-bot'), # Extra BTC/ETH Stakes Integer(10, 70, name='xtra-inf-stake-rmi'), Integer(10, 70, name='xtra-base-stake-rmi'), Integer(10, 70, name='xtra-base-fiat-rmi'), # Extra BTC/STAKE if not in whitelist Integer(10, 70, name='xbtc-base-rmi'), Integer(10, 70, name='xbtc-inf-rmi') ]
def roi_space() -> List[Dimension]: return [ Integer(1, 5, name='roi_t6'), Integer(1, 10, name='roi_t5'), Integer(1, 15, name='roi_t4'), Integer(15, 20, name='roi_t3'), Integer(20, 25, name='roi_t2'), Integer(25, 60, name='roi_t1'), Real(0.005, 0.10, name='roi_p6'), Real(0.005, 0.07, name='roi_p5'), Real(0.005, 0.05, name='roi_p4'), Real(0.005, 0.025, name='roi_p3'), Real(0.005, 0.01, name='roi_p2'), Real(0.003, 0.007, name='roi_p1'), ]
def roi_space() -> List[Dimension]: return [ Integer(1, 15, name='roi_t6'), Integer(1, 45, name='roi_t5'), Integer(1, 90, name='roi_t4'), Integer(45, 120, name='roi_t3'), Integer(45, 180, name='roi_t2'), Integer(90, 300, name='roi_t1'), Real(0.005, 0.10, name='roi_p6'), Real(0.005, 0.07, name='roi_p5'), Real(0.005, 0.05, name='roi_p4'), Real(0.005, 0.025, name='roi_p3'), Real(0.005, 0.01, name='roi_p2'), Real(0.003, 0.007, name='roi_p1'), ]
def roi_space() -> List[Dimension]: return [ Integer(1, 300, name='roi_t6'), Integer(1, 300, name='roi_t5'), Integer(1, 300, name='roi_t4'), Integer(1, 300, name='roi_t3'), Integer(1, 300, name='roi_t2'), Integer(1, 300, name='roi_t1'), Real(0.001, 0.005, name='roi_p6'), Real(0.001, 0.005, name='roi_p5'), Real(0.001, 0.005, name='roi_p4'), Real(0.001, 0.005, name='roi_p3'), Real(0.0001, 0.005, name='roi_p2'), Real(0.0001, 0.005, name='roi_p1'), ]
def roi_space() -> List[Dimension]: # min / max min / max return [ # 0 : 0.100 / 0.205 Integer(1, 20, name='roi_t6'), # 1 -> 20 : 0.050 / 0.105 Integer(10, 20, name='roi_t5'), # 11 -> 40 : 0.030 / 0.055 Integer(10, 20, name='roi_t4'), # 21 -> 60 : 0.015 / 0.035 Integer(15, 30, name='roi_t3'), # 36 -> 90 : 0.010 / 0.020 Integer(264, 630, name='roi_t2'), # 300 -> 720 : 0.005 / 0.010 Integer(420, 720, name='roi_t1'), # 720 -> 1440 : 0 Real(0.05, 0.10, name='roi_p6'), Real(0.02, 0.05, name='roi_p5'), Real(0.015, 0.020, name='roi_p4'), Real(0.005, 0.015, name='roi_p3'), Real(0.005, 0.01, name='roi_p2'), Real(0.005, 0.01, name='roi_p1'), ]
def optimise(self, parameter_definitions, function, run_schedule): # Parse parameter definitions to a list of skopt dimensions dimensions = [] for (name, rang) in parameter_definitions.items(): if type(rang[1]) is int: dimensions.append(Integer(low=rang[0], high=rang[1], name=name)) else: dimensions.append(Real(low=rang[0], high=rang[1], name=name)) # Bayesian Optimisation using gaussian process(GP) using expected # improvement (EI) as an acquisition function results_object = gp_minimize(function, dimensions, acq_func='EI', noise=1e-10, n_calls=run_schedule[0], n_random_starts=run_schedule[1], n_jobs=-1, verbose=True ) return results_object
def opt_mf(): # Parameter to optimize: # components, user_reg, item_reg, l_rate space = [Integer(1, 10), # components Real(1e-9, 0.8), # user_reg Real(1e-9, 0.8), # pos_item_reg Real(1e-9, 0.8), # neg_item_reg Real(1e-5, 1), # l_rate ] x0 = [2, 1e-2, 1e-3, 1e-3, 5e-3] x1 = [3, 1e-2, 1e-3, 1e-2, 5e-3] x2 = [4, 1e-2, 1e-3, 1e-2, 5e-1] x3 = [4, 1e-2, 1e-2, 5e-3, 1e-1] x4 = [5, 1e-2, 1e-3, 1e-3, 5e-2] x0s = [x0, x1, x2, x3, x4] # get the current fold res = gbrt_minimize(objective, space, x0=x0s, verbose=True, n_random_starts=20, n_calls=1000, xi=0.01,n_jobs=-1, callback=result) print('Maximimum p@k found: {:6.5f}'.format(-res.fun)) print('Optimal parameters:') params = ['components', 'user_reg', 'CSLIM'] for (p, x_) in zip(params, res.x): print('{}: {}'.format(p, x_))
def skopt_main(): from skopt import Optimizer, dump, load, Space from skopt.learning import GaussianProcessRegressor from skopt.space import Real, Integer fname = 'optimizer-exp-pendulum-4.pkl' dims = [Integer(15, 500), Real(0.025, 0.1, prior="log-uniform")] try: optimizer = load(fname) optimizer.space = Space(dims) except: optimizer = Optimizer(dimensions=dims, random_state=1) n_jobs = 2 for i in range(3): pool = Pool(n_jobs, initializer=mute) x = optimizer.ask(n_points=n_jobs) # x is a list of n_points points print(x) y = pool.map(f, x) pool.close() optimizer.tell(x, y) print('Iteration %d. Best yi %.2f' % (i, min(optimizer.yi))) dump(optimizer, fname)
def get_param_space(model_type, optimazer_type): # param_dict = set_param('lightgbm') param_dict = set_param(model_type) if optimazer_type == 'hyperopt': param_space = dict() param_names = list(param_dict.keys()) for param_name in param_names: value_type = param_dict[param_name][0] value_range = param_dict[param_name][1] if value_type == 'int': param_space[param_name] = hp.randint(param_name, value_range[1]) elif value_type == 'float': param_space[param_name] = hp.uniform(param_name, value_range[0], value_range[1]) elif value_type == 'str': param_space[param_name] = hp.choice(param_name, list(value_range)) elif optimazer_type == 'skopt': param_space = [] param_names = list(param_dict.keys()) for param_name in param_names: value_type = param_dict[param_name][0] value_range = param_dict[param_name][1] if value_type == 'int': param_space.append( Integer(value_range[0], value_range[1], name=param_name)) elif value_type == 'float': param_space.append( Real(value_range[0] + 10**-6, value_range[1], "log-uniform", name=param_name)) elif value_type == 'str': param_space.append( Categorical(categories=list(value_range), name=param_name)) return param_space
def roi_space() -> List[Dimension]: # min / max min / max return [ # 0 : 0.100 / 0.200 Integer(1, 20, name='roi_t6'), # 1 -> 20 : 0.050 / 0.100 Integer(10, 20, name='roi_t5'), # 11 -> 40 : 0.030 / 0.050 Integer(10, 20, name='roi_t4'), # 21 -> 60 : 0.015 / 0.030 Integer(15, 30, name='roi_t3'), # 36 -> 90 : 0.010 / 0.015 Integer(45, 90, name='roi_t2'), # 81 -> 180 : 0.003 / 0.005 Integer(90, 180, name='roi_t1'), # 171 -> 360 : 0.0025 (should be 0 but I changed it above.) Real(0.05, 0.10, name='roi_p6'), Real(0.02, 0.05, name='roi_p5'), Real(0.015, 0.020, name='roi_p4'), Real(0.005, 0.015, name='roi_p3'), Real(0.007, 0.01, name='roi_p2'), Real(0.003, 0.005, name='roi_p1'), ]
class DecisionTreeClassifier(Estimator): """Decision Tree Classifier.""" name = "Decision Tree Classifier" hyperparameter_ranges = { "criterion": ["gini", "entropy"], "max_features": ["auto", "sqrt", "log2"], "max_depth": Integer(4, 10) } model_family = ModelFamily.DECISION_TREE supported_problem_types = [ ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS ] def __init__(self, criterion="gini", max_features="auto", max_depth=6, min_samples_split=2, min_weight_fraction_leaf=0.0, random_state=None, random_seed=0, **kwargs): parameters = { "criterion": criterion, "max_features": max_features, "max_depth": max_depth, "min_samples_split": min_samples_split, "min_weight_fraction_leaf": min_weight_fraction_leaf } parameters.update(kwargs) random_seed = deprecate_arg("random_state", "random_seed", random_state, random_seed) dt_classifier = SKDecisionTreeClassifier(random_state=random_seed, **parameters) super().__init__(parameters=parameters, component_obj=dt_classifier, random_seed=random_seed)
def _convert_PHOTON_to_skopt_space(self, hyperparam: object, name: str): if not hyperparam: return None self.hyperparameter_list.append(name) if isinstance(hyperparam, PhotonCategorical): return skoptCategorical(hyperparam.values, name=name) elif isinstance(hyperparam, list): return skoptCategorical(hyperparam, name=name) elif isinstance(hyperparam, FloatRange): if hyperparam.range_type == "linspace": return Real(hyperparam.start, hyperparam.stop, name=name, prior="uniform") elif hyperparam.range_type == "logspace": return Real(hyperparam.start, hyperparam.stop, name=name, prior="log-uniform") else: return Real(hyperparam.start, hyperparam.stop, name=name) elif isinstance(hyperparam, IntegerRange): return Integer(hyperparam.start, hyperparam.stop, name=name)