def full_grid(self): #This function parses the param_grid variable from the user and sets up the #parameter space for Bayesian search self.param_types = [ self.param_grid[item][0] for item in self.param_grid ] self.param_lst = [] for i, item in enumerate(self.param_grid): if self.param_types[i] in ['grid', 'categorical']: self.param_lst.append(self.param_grid[item][1]) else: self.param_lst.append(self.param_grid[item][1:]) self.param_names = [item for item in self.param_grid] self.dimensions = [] self.func_args = self.get_func_args(self.fit) for types, vals, names in zip(self.param_types, self.param_lst, self.param_names): if types in ['int', 'discrete']: lb = vals[0] ub = vals[1] self.dimensions.append(Integer(low=lb, high=ub, name=names)) elif types in ['float', 'continuous']: lb = vals[0] ub = vals[1] self.dimensions.append(Real(low=lb, high=ub, name=names)) elif types in ['grid', 'categorical']: real_grid = vals self.dimensions.append( Categorical(categories=tuple(real_grid), name=names)) else: raise Exception( '--error: the param types must be one of int/discrete or float/continuous or grid/categorical, this type is not avaiable: `{}`' .format(types))
def __init__(self, skopt_args=None, space=None): super().__init__(space) if skopt is None: raise ValueError('scikit-optimize is not installed') skopt_dims = [] param_names = [] for n, p in self.space.named_params(): if isinstance(p, Numeric): if p.is_int(): sd = Integer(*p.bound, name=n) else: sd = Real(*p.bound, name=n) elif isinstance(p, ParamCategorical): sd = Categorical(p.choices, name=n) else: continue skopt_dims.append(sd) param_names.append(n) skopt_args = skopt_args or {} skopt_args['dimensions'] = skopt_dims if 'random_state' not in skopt_args: skopt_args['random_state'] = int(time.time()) self.param_names = param_names self.skoptim = Optimizer(**skopt_args)
def run(self): self.x0 = None self.y0 = None space = [ Real(self.start, self.end, name=x) for x in self.matrices_names ] self.res = gp_minimize(self.obiettivo, space, base_estimator=None, n_calls=self.n_calls, n_random_starts=self.n_random_starts, acq_func='gp_hedge', acq_optimizer='auto', x0=self.x0, y0=self.y0, random_state=None, verbose=self.verbose, callback=None, n_points=self.n_points, n_restarts_optimizer=10, xi=self.step, kappa=1.96, noise='gaussian', n_jobs=self.n_cpu)
def orion_space_to_skopt_space(orion_space): """Convert Oríon's definition of problem's domain to a skopt compatible.""" dimensions = [] for key, dimension in orion_space.items(): # low = dimension._args[0] # high = low + dimension._args[1] low, high = dimension.interval() # NOTE: A hack, because orion priors have non-inclusive higher bound # while scikit-optimizer have inclusive ones. # pylint: disable = assignment-from-no-return high = numpy.nextafter(high, high - 1) shape = dimension.shape assert not shape or len(shape) == 1 if not shape: shape = (1, ) # Unpack dimension for i in range(shape[0]): dimensions.append( Real(name=key + '_' + str(i), prior='uniform', low=low, high=high)) return Space(dimensions)
def getPipeRC(num_features): """ Return a pipeline and a search space for a RidgeClassifier. Parameters ---------- num_features: int The number of features that the estimator will be trained on. Returns ------- Pipeline, dict A pipeline object representing an estimator followed by a dictionary representing a search space for Bayesian hyperparameter optimization. """ from skopt.space import Real rc = RidgeClassifier(solver='auto') search_space = { 'ridgeclassifier__alpha': Real(MIN_SEARCH, 1.0, prior="uniform") } return (Pipeline([('ss', StandardScaler()), ('ridgeclassifier', rc)]), search_space)
def indicator_space() -> List[Dimension]: return [ # Base Timeframe Real(0.005, 0.015, name='base-bbdelta'), Real(0.01, 0.03, name='base-closedelta'), Real(0.10, 0.75, name='base-tail'), Real(0.75, 1.1, name='base-bblower'), Integer(5, 50, name='base-vms'), # Informative Timeframe Categorical(['lower', 'upper', 'both', 'none'], name='inf-guard'), Real(0.70, 0.99, name='inf-pct-adr-top'), Real(0.01, 0.20, name='inf-pct-adr-bot'), # Extra BTC/ETH Stakes Integer(10, 70, name='xtra-inf-stake-rmi'), Integer(10, 70, name='xtra-base-stake-rmi'), Integer(10, 70, name='xtra-base-fiat-rmi'), # Extra BTC/STAKE if not in whitelist Integer(10, 70, name='xbtc-base-rmi'), Integer(10, 70, name='xbtc-inf-rmi') ]
def optimize_xg(model_xg, X_tr, y_tr, X_vl=None, y_vl=None): space = [ Integer(50, 500, name='n_estimators'), Integer(1, 5, name='max_depth'), Integer(1, 10, name='min_child_weight'), Real(10 ** -5, 1, "log-uniform", name='learning_rate'), Integer(1, 10, name='max_delta_step'), Real(.1, 1, name='subsample'), Real(.1, 1, name='colsample_bytree'), Real(10 ** -5, 1, "log-uniform", name='reg_alpha'), Real(10 ** -5, 1, "log-uniform", name='reg_lambda'), Real(10 ** -5, 10, "log-uniform", name='gamma'), ] @use_named_args(space) def objective(**params): model_xg.set_params(**params) if X_vl is None or y_vl is None: scores = cross_val_score(model_xg, X_tr, y_tr, cv=4, scoring="neg_mean_absolute_error") score = -np.mean(scores) else: model_xg.fit(X_tr, y_tr) y_vl_pred = model_xg.predict(X_vl) score = mean_absolute_error(y_vl, y_vl_pred) return score resp = gp_minimize( objective, space, n_calls=100, random_state=42, verbose=True, n_jobs=-1, ) return resp
def test_space_consistency(): # Reals (uniform) s1 = Space([Real(0.0, 1.0)]).rvs(n_samples=10, random_state=0) s2 = Space([Real(0.0, 1.0)]).rvs(n_samples=10, random_state=0) s3 = Space([Real(0, 1)]).rvs(n_samples=10, random_state=0) s4 = Space([(0.0, 1.0)]).rvs(n_samples=10, random_state=0) s5 = Space([(0.0, 1.0, "uniform")]).rvs(n_samples=10, random_state=0) assert_array_equal(s1, s2) assert_array_equal(s1, s3) assert_array_equal(s1, s4) assert_array_equal(s1, s5) # Reals (log-uniform) s1 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")]).rvs(n_samples=10, random_state=0) s2 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")]).rvs(n_samples=10, random_state=0) s3 = Space([Real(10**-3, 10**3, prior="log-uniform")]).rvs(n_samples=10, random_state=0) s4 = Space([(10**-3.0, 10**3.0, "log-uniform")]).rvs(n_samples=10, random_state=0) assert_array_equal(s1, s2) assert_array_equal(s1, s3) assert_array_equal(s1, s4) # Integers s1 = Space([Integer(1, 5)]).rvs(n_samples=10, random_state=0) s2 = Space([Integer(1.0, 5.0)]).rvs(n_samples=10, random_state=0) s3 = Space([(1, 5)]).rvs(n_samples=10, random_state=0) assert_array_equal(s1, s2) assert_array_equal(s1, s3) # Categoricals s1 = Space([Categorical(["a", "b", "c"])]).rvs(n_samples=10, random_state=0) s2 = Space([Categorical(["a", "b", "c"])]).rvs(n_samples=10, random_state=0) assert_array_equal(s1, s2)
def _hyperparameter_optimization(self, num_iterations=30, save_results=True, display_plot=False, batch_size=20, n_random_starts=10, use_TPU=False, transfer_model='Inception', cutoff_regularization=False, min_accuracy=None): """ min_accuracy: minimum value of categorical accuracy we want after 1 iteration num_iterations: number of hyperparameter combinations we try n_random_starts: number of random combinations of hyperparameters first tried """ self.min_accuracy = min_accuracy self.batch_size = batch_size self.use_TPU = use_TPU self.transfer_model = transfer_model self.cutoff_regularization = cutoff_regularization #import scikit-optimize libraries from skopt import gp_minimize from skopt.space import Real, Categorical, Integer from skopt.plots import plot_convergence from skopt.utils import use_named_args #declare the hyperparameters search space dim_epochs = Integer(low=1, high=10, name='epochs') dim_hidden_size = Integer(low=6, high=2048, name='hidden_size') dim_learning_rate = Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate') dim_dropout = Real(low=0, high=0.9, name='dropout') dim_fine_tuning = Categorical(categories=[True, False], name='fine_tuning') dim_nb_layers = Integer(low=1, high=3, name='nb_layers') dim_activation = Categorical(categories=['relu', 'tanh'], name='activation') dim_include_class_weight = Categorical(categories=[True, False], name='include_class_weight') dimensions = [ dim_epochs, dim_hidden_size, dim_learning_rate, dim_dropout, dim_fine_tuning, dim_nb_layers, dim_activation, dim_include_class_weight ] #read default parameters from last optimization try: with open( parentdir + '/data/trained_model/hyperparameters_search.pickle', 'rb') as f: sr = dill.load(f) default_parameters = sr.x print('parameters of previous optimization loaded!') except: #fall back default values default_parameters = [5, 1024, 1e-4, 0, True, 1, 'relu', True] self.number_iterations = 0 #declare the fitness function @use_named_args(dimensions=dimensions) def fitness(epochs, hidden_size, learning_rate, dropout, fine_tuning, nb_layers, activation, include_class_weight): self.number_iterations += 1 #print the hyper-parameters print('epochs:', epochs) print('hidden_size:', hidden_size) print('learning rate:', learning_rate) print('dropout:', dropout) print('fine_tuning:', fine_tuning) print('nb_layers:', nb_layers) print('activation:', activation) print('include_class_weight', include_class_weight) print() #fit the model self.fit(epochs=epochs, hidden_size=hidden_size, learning_rate=learning_rate, dropout=dropout, fine_tuning=fine_tuning, nb_layers=nb_layers, activation=activation, include_class_weight=include_class_weight, batch_size=self.batch_size, use_TPU=self.use_TPU, transfer_model=self.transfer_model, min_accuracy=self.min_accuracy, cutoff_regularization=self.cutoff_regularization) #extract fitness fitness = self.fitness print('CALCULATED FITNESS AT ITERATION', self.number_iterations, 'OF:', fitness) print() del self.model K.clear_session() return -1 * fitness # optimization self.search_result = gp_minimize( func=fitness, dimensions=dimensions, acq_func='EI', # Expected Improvement. n_calls=num_iterations, n_random_starts=n_random_starts, x0=default_parameters) if save_results: if not os.path.exists(parentdir + '/data/trained_models'): os.makedirs(parentdir + '/data/trained_models') with open( parentdir + '/data/trained_models/hyperparameters_dimensions.pickle', 'wb') as f: dill.dump(dimensions, f) with open( parentdir + '/data/trained_models/hyperparameters_search.pickle', 'wb') as f: dill.dump(self.search_result.x, f) print("Hyperparameter search saved!") if display_plot: plot_convergence(self.search_result) #build results dictionary results_dict = { dimensions[i].name: self.search_result.x[i] for i in range(len(dimensions)) } print('Optimal hyperameters found of:') print(results_dict) print() print('Optimal fitness value of:', -float(self.search_result.fun))
S = Snew it = it + 1 yhat_valid = np.sum( np.multiply((P[x_valid[:, 0], :]), (Q[x_valid[:, 1], :])), 1) #yhat_valid=np.round(yhat_valid,decimals=4) RMSE = sqrt((y_valid - yhat_valid) @ (y_valid - yhat_valid) / y_valid.size) #MAE=sum(abs((y_valid-yhat_valid)))/y_valid.size return RMSE, yhat_valid # 2-layered gssvd rmse focused k =4,...,10 # In[ ]: lp = Real(low=1e-7, high=1, prior='uniform', name='lp') ls = Real(low=1e-7, high=1, prior='uniform', name='ls') la = Real(low=1e-7, high=1, prior='uniform', name='la') lq = Real(low=1e-7, high=1, prior='uniform', name='lq') lt = Real(low=1e-7, high=1, prior='uniform', name='lt') lb = Real(low=1e-7, high=1, prior='uniform', name='lb') Lp = Real(low=1e-7, high=1, prior='uniform', name='Lp') Ls = Real(low=1e-7, high=1, prior='uniform', name='Ls') La = Real(low=1e-7, high=1, prior='uniform', name='La') Lq = Real(low=1e-7, high=1, prior='uniform', name='Lq') Lt = Real(low=1e-7, high=1, prior='uniform', name='Lt') Lb = Real(low=1e-6, high=1, prior='uniform', name='Lb') dimensions = [lp, ls, la, lq, lt, lb, Lp, Ls, La, Lq, Lt, Lb] # In[ ]:
def setup_dimensions(self, x_min, x_max, y_min, y_max, n_blocks): dimensions = [] for i in range(n_blocks): dimensions.append(Real(x_min, x_max)) dimensions.append(Real(y_min, y_max)) return dimensions
def get_hyperspace(self): """ Create integer HyperSpaces. """ return Real(self.space0_low, self.space0_high, prior=self.prior, transform=self.transform), \ Real(self.space1_low, self.space1_high, prior=self.prior, transform=self.transform)
model.add(Dense(3, activation = 'softmax')) return model def model_fit(model, x_train, y_train, x_valid, y_valid, b_size): adam = Adam(lr = 0.005) model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy']) callback = EarlyStopping(monitor = 'val_acc', patience = 10, verbose = 1, mode = 'auto') model.fit(x_train, np_utils.to_categorical(y_train), epochs = 1000, batch_size = b_size, validation_data = (x_valid, np_utils.to_categorical(y_valid)), callbacks = [callback]) loss, acc = model.evaluate(x_train, np_utils.to_categorical(y_train)) return acc space = [ Categorical([20, 16, 12, 8], name = 'filter0'), Categorical([48, 32, 24, 20], name = 'filter1'), Real(0, 0.5, name = 'rate0'), Real(0, 0.5, name = 'rate1'), Categorical([32, 64, 128], name = 'b_size') ] @use_named_args(space) def objective0(filter0, filter1, rate0, rate1, b_size): from sklearn.model_selection import StratifiedKFold skf = StratifiedKFold(n_splits = 3) for train_index, valid_index in skf.split(train_x, train_y): x_train, x_valid = train_x[train_index], train_x[valid_index] y_train, y_valid = train_y[train_index], train_y[valid_index] model = lstmmodel(filter0, filter1, rate0, rate1) fitting = model_fit(model, x_train, y_train, x_valid, y_valid, b_size) return -fitting
def hyperparams_tuning(recommender_class, URM_train, URM_validation, URM_test): # Step 1: Import the evaluator objects print("Evaluator objects ... ") cutoff = 5 evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[cutoff]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[cutoff, cutoff + 5]) # evaluator_validation_earlystopping = EvaluatorHoldout(URM_train, cutoff_list=[cutoff, cutoff+5], exclude_seen=False) # Step 2: Create BayesianSearch object print("BayesianSearch objects ... ") parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) # Step 3: Define parameters range print("Parameters range ...") # n_cases = 8 # n_random_starts = int(n_cases / 3) # 5 n_cases = 2 metric_to_optimize = "MAP" output_file_name_root = "{}_metadata.zip".format( recommender_class.RECOMMENDER_NAME) hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["l1_ratio"] = Real(low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') # earlystopping_keywargs = {"validation_every_n": 5, # "stop_on_validation": True, # "evaluator_object": evaluator_validation_earlystopping, # or evaluator_validation # "lower_validations_allowed": 5, # "validation_metric": metric_to_optimize, # } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={} # earlystopping_keywargs ) output_folder_path = "../results/result_experiments/" # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Step 4: run best_parameters = parameterSearch.search( recommender_input_args, # the function to minimize parameter_search_space= hyperparameters_range_dictionary, # the bounds on each dimension of x n_cases=n_cases, # the number of evaluations of f n_random_starts=1, # the number of random initialization points #n_random_starts = int(n_cases/3), save_model="no", output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) print("best_parameters", best_parameters) # Step 5: return best_parameters # from utils.DataIO import DataIO # data_loader = DataIO(folder_path=output_folder_path) # search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip") # print("search_metadata", search_metadata) # best_parameters = search_metadata["hyperparameters_best"] return best_parameters
def doGradBoost(X, y, X_test, y_test, n_jobs, feature_labels, class_labels, bayesOpt=False, acqFunc='', num_iter=25): # MIN_SEARCH = 2e-12 # num_features = 67 # search_space = { # 'gb__n_estimators': Integer(50, 150), # 'gb__max_features': Real(MIN_SEARCH, 1.0, prior='uniform'), # 'gb__criterion': Categorical(['friedman_mse', # 'mse']), # mae is very slow. # 'gb__min_samples_split': Real(MIN_SEARCH, # 1.0, # prior='log-uniform'), # 'gb__min_samples_leaf': Real(MIN_SEARCH, # 0.5, # prior='log-uniform'), # 'gb__max_depth': Integer(2, num_features), # } search_space = { 'rf__max_depth': Integer(1, 55), "rf__max_features": Real(.05, 1.0, prior='log-uniform') } parameters = { 'gb__max_depth': [i for i in range(5, 60, 5)], "gb__max_features": [i / 10.0 for i in range(1, 11)] } best_estimator = executeML(X, y, X_test, y_test, n_jobs, feature_labels, class_labels, getPipeRF(), parameters, "GB" + acqFunc, bayesOpt=bayesOpt, search_space=search_space, n_iter=num_iter, acq_func=acqFunc) try: imp = best_estimator.named_steps['gb'].feature_importances_ feature_importances = sorted(zip(feature_labels, imp), key=lambda fi: fi[1], reverse=True) print("") print("Grad boost feature importances.") i = 1 for item in feature_importances: print(i, str(item)) i += 1 print("") sys.stdout.flush() except AttributeError: print("How to get the feature importances for GB?") return best_estimator
#INPATH = '/imdata/error_log_analysis/data/' OUTPATH = '/nfshome/llayer/AIErrorLogAnalysis/experiments/' # Average the vectors AVG_W2V = False FOLDS = 3 # Include counts MSG_ONLY = False PRUNING = 'Neg' if AVG_W2V == False: # Skopt dimensions SKOPT_DIM = [ Real( low=1e-5, high=1e-3, prior='log-uniform', name='learning_rate' ), Real( low=1e-3, high=0.1, prior='log-uniform', name='dropout' ), #Real( low=1e-4, high=0.9, prior="log-uniform", name='l2_regulizer' ), Integer( low=5, high=32, name='embedding' ), Integer( low=2, high=32, name='rnn_units' ), #Integer( low=2, high = 20, name = 'units_site' ), Integer( low=1, high=5, name='dense_layers' ), Integer( low=10, high=50, name='dense_units' ), #Integer( low=2, high=20, name='att_units' ), #Integer( low=0, high=1, name='encode_sites' ), #Integer( low=0, high=1, name='train_embedding' ), ] # batch_size and epochs BATCH_SIZE = 1 MAX_EPOCHS = 12
print('tp:', true_positive, 'fp:', false_positive) print('fn:', false_negative, 'tn:', true_negative) precision = float(true_positive) / (float(true_positive) + float(false_positive)) recall = float(true_positive) / (float(true_positive) + float(false_negative)) f1_score = (2 * precision * recall) / (precision + recall) acc = (true_positive + true_negative) / (true_positive + false_positive + false_negative + true_negative) TNR = float(true_negative) / (float(false_positive) + float(true_negative)) return precision, recall, f1_score, acc, TNR # target params max_depth = Integer(low=1, high=32, name='max_depth') min_samples_split = Real(low=0.1, high=1, name='min_samples_split') min_samples_leaf = Real(low=0.1, high=0.5, name='min_samples_leaf') max_features = Integer(low=1, high=65, name="max_features") dimensions = [max_depth, min_samples_split, min_samples_leaf, max_features] default_parameters = [5, 0.2, 0.2, 15] # input prepare # 输入限制,文章数量及每篇文章输入的句子/段落数 file_num_limit = 45614 # total 45614 paras_limit = 20 onehotlabels, stats_features = prepare_input(file_num_limit) # stats_features 标准化 scaler = preprocessing.StandardScaler() #实例化
@pytest.mark.fast_test @pytest.mark.parametrize("dimensions, normalizations", [ (((1, 3), (1., 3.)), ('normalize', 'normalize')), (((1, 3), ('a', 'b', 'c')), ('normalize', 'onehot')), ]) def test_normalize_dimensions(dimensions, normalizations): space = normalize_dimensions(dimensions) for dimension, normalization in zip(space, normalizations): assert dimension.transform_ == normalization @pytest.mark.fast_test @pytest.mark.parametrize( "dimension, name", [(Real(1, 2, name="learning rate"), "learning rate"), (Integer(1, 100, name="no of trees"), "no of trees"), (Categorical(["red, blue"], name="colors"), "colors")]) def test_normalize_dimensions(dimension, name): space = normalize_dimensions([dimension]) assert space.dimensions[0].name == name @pytest.mark.fast_test def test_use_named_args(): """ Test the function wrapper @use_named_args which is used for wrapping an objective function with named args so it can be called by the optimizers which only pass a single list as the arg.
class CatBoostClassifier(Estimator): """ CatBoost Classifier, a classifier that uses gradient-boosting on decision trees. CatBoost is an open-source library and natively supports categorical features. For more information, check out https://catboost.ai/ """ name = "CatBoost Classifier" hyperparameter_ranges = { "n_estimators": Integer(4, 100), "eta": Real(0.000001, 1), "max_depth": Integer(4, 10), } model_family = ModelFamily.CATBOOST supported_problem_types = [ ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS ] SEED_MIN = 0 SEED_MAX = SEED_BOUNDS.max_bound def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, silent=True, allow_writing_files=False, random_state=0, **kwargs): random_seed = get_random_seed(random_state, self.SEED_MIN, self.SEED_MAX) parameters = { "n_estimators": n_estimators, "eta": eta, "max_depth": max_depth, 'bootstrap_type': bootstrap_type, 'silent': silent, 'allow_writing_files': allow_writing_files } parameters.update(kwargs) cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`" catboost = import_or_raise("catboost", error_msg=cb_error_msg) self._label_encoder = None # catboost will choose an intelligent default for bootstrap_type, so only set if provided cb_parameters = copy.copy(parameters) if bootstrap_type is None: cb_parameters.pop('bootstrap_type') cb_classifier = catboost.CatBoostClassifier(**cb_parameters, random_seed=random_seed) super().__init__(parameters=parameters, component_obj=cb_classifier, random_state=random_state) def fit(self, X, y=None): X = _convert_to_woodwork_structure(X) cat_cols = list(X.select('category').columns) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_to_woodwork_structure(y) y = _convert_woodwork_types_wrapper(y.to_series()) # For binary classification, catboost expects numeric values, so encoding before. if y.nunique() <= 2: self._label_encoder = LabelEncoder() y = pd.Series(self._label_encoder.fit_transform(y)) self._component_obj.fit(X, y, silent=True, cat_features=cat_cols) return self def predict(self, X): X = _convert_to_woodwork_structure(X) X = _convert_woodwork_types_wrapper(X.to_dataframe()) predictions = self._component_obj.predict(X) if predictions.ndim == 2 and predictions.shape[1] == 1: predictions = predictions.flatten() if self._label_encoder: predictions = self._label_encoder.inverse_transform( predictions.astype(np.int64)) return _convert_to_woodwork_structure(predictions) @property def feature_importance(self): return self._component_obj.get_feature_importance()
############################################################################### if __name__ == "__main__": ################################### # Select Optimization Options # ################################### #=== Number of Iterations ===# n_calls = 10 #=== Select Hyperparameters of Interest ===# hyperp_of_interest_dict = {} hyperp_of_interest_dict['num_hidden_layers_encoder'] = Integer( 5, 10, name='num_hidden_layers_encoder') hyperp_of_interest_dict['num_hidden_nodes_encoder'] = Integer( 100, 1000, name='num_hidden_nodes_encoder') # hyperp_of_interest_dict['activation'] = Categorical(['relu', 'elu', 'sigmoid', 'tanh'], name='activation') hyperp_of_interest_dict['penalty_js'] = Real(0, 1, name='penalty_js') #hyperp_of_interest_dict['batch_size'] = Integer(100, 500, name='batch_size') ##################### # Initial Setup # ##################### #=== Generate skopt 'space' list ===# space = [] for key, val in hyperp_of_interest_dict.items(): space.append(val) #=== Hyperparameters ===# with open('../config_files/hyperparameters_vae.yaml') as f: hyperp = yaml.safe_load(f) hyperp = AttrDict(hyperp)
def runParameterSearch_Collaborative(recommender_class, URM_train, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=30): from ParameterTuning.AbstractClassSearch import DictionaryKeys # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = BayesianSkoptSearch( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, Random]: recommender = recommender_class(URM_train) recommender.fit() output_file = open( output_folder_path + output_file_name_root + "_BayesianSearch.txt", "a") result_dict, result_baseline = evaluator_validation.evaluateRecommender( recommender) output_file.write( "ParameterSearch: Best result evaluated on URM_validation. Results: {}" .format(result_baseline)) pickle.dump(result_dict.copy(), open( output_folder_path + output_file_name_root + "_best_result_validation", "wb"), protocol=pickle.HIGHEST_PROTOCOL) result_dict, result_baseline = evaluator_test.evaluateRecommender( recommender) output_file.write( "ParameterSearch: Best result evaluated on URM_test. Results: {}" .format(result_baseline)) pickle.dump(result_dict.copy(), open( output_folder_path + output_file_name_root + "_best_result_test", "wb"), protocol=pickle.HIGHEST_PROTOCOL) output_file.close() return ########################################################################################################## if recommender_class is UserKNNCFRecommender: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNCFRecommender_on_similarity_type, parameterSearch=parameterSearch, URM_train=URM_train, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) if parallelizeKNN: pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1) resultList = pool.map( run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is ItemKNNCFRecommender: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNCFRecommender_on_similarity_type, parameterSearch=parameterSearch, URM_train=URM_train, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) if parallelizeKNN: pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1) resultList = pool.map( run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = Integer(5, 800) hyperparamethers_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparamethers_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = Integer(5, 800) hyperparamethers_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparamethers_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparamethers_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["sgd_mode"] = Categorical( ["adagrad", "adam"]) #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150) hyperparamethers_range_dictionary["num_factors"] = Integer(1, 150) hyperparamethers_range_dictionary["reg"] = Real( low=1e-12, high=1e-3, prior='log-uniform') hyperparamethers_range_dictionary["learning_rate"] = Real( low=1e-5, high=1e-2, prior='log-uniform') recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validatons_allowed": 20, "validation_metric": metric_to_optimize }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["sgd_mode"] = Categorical( ["adagrad", "adam"]) #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150) hyperparamethers_range_dictionary["num_factors"] = Integer(1, 150) hyperparamethers_range_dictionary["batch_size"] = Categorical([1]) hyperparamethers_range_dictionary["positive_reg"] = Real( low=1e-12, high=1e-3, prior='log-uniform') hyperparamethers_range_dictionary["negative_reg"] = Real( low=1e-12, high=1e-3, prior='log-uniform') hyperparamethers_range_dictionary["learning_rate"] = Real( low=1e-5, high=1e-2, prior='log-uniform') recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: { 'positive_threshold': 0 }, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validatons_allowed": 20, "validation_metric": metric_to_optimize }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["num_factors"] = Integer(1, 250) recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: {}, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = Integer(5, 800) #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150) hyperparamethers_range_dictionary["sgd_mode"] = Categorical( ["adagrad", "adam"]) hyperparamethers_range_dictionary["lambda_i"] = Real( low=1e-12, high=1e-3, prior='log-uniform') hyperparamethers_range_dictionary["lambda_j"] = Real( low=1e-12, high=1e-3, prior='log-uniform') recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: { 'train_with_sparse_weights': False, 'symmetric': False, 'positive_threshold': 0 }, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validatons_allowed": 10, "validation_metric": metric_to_optimize }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = Integer(5, 800) hyperparamethers_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ######################################################################################################### ## Final step, after the hyperparameter range has been defined for each type of algorithm best_parameters = parameterSearch.search( recommenderDictionary, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
externalize = externalfunc(prog='python run_train_ex.py', names = ['par%s'%d for d in range(n_par)]) run_for = 20 use_func = externalize if len(sys.argv)>1: do = sys.argv[1] if do=='threaded': use_func = dummy_func elif do=='external': use_func = externalize dim = [Real(-20, 20) for i in range(n_par)] start = time.mktime(time.gmtime()) res = gp_minimize( func=use_func, dimensions=dim, n_calls = run_for, ) print "GPM best value",res.fun,"at",res.x #print res print "took",time.mktime(time.gmtime())-start,"[s]" o = Optimizer( n_initial_points =5,
class CatBoostRegressor(Estimator): """ CatBoost Regressor, a regressor that uses gradient-boosting on decision trees. CatBoost is an open-source library and natively supports categorical features. For more information, check out https://catboost.ai/ """ name = "CatBoost Regressor" hyperparameter_ranges = { "n_estimators": Integer(4, 100), "eta": Real(0.000001, 1), "max_depth": Integer(4, 10), } model_family = ModelFamily.CATBOOST supported_problem_types = [ ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION ] SEED_MIN = 0 SEED_MAX = SEED_BOUNDS.max_bound def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, silent=False, allow_writing_files=False, random_state=0, **kwargs): random_seed = get_random_seed(random_state, self.SEED_MIN, self.SEED_MAX) parameters = { "n_estimators": n_estimators, "eta": eta, "max_depth": max_depth, 'bootstrap_type': bootstrap_type, 'silent': silent, 'allow_writing_files': allow_writing_files } parameters.update(kwargs) cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`" catboost = import_or_raise("catboost", error_msg=cb_error_msg) # catboost will choose an intelligent default for bootstrap_type, so only set if provided cb_parameters = copy.copy(parameters) if bootstrap_type is None: cb_parameters.pop('bootstrap_type') cb_regressor = catboost.CatBoostRegressor(**cb_parameters, random_seed=random_seed) super().__init__(parameters=parameters, component_obj=cb_regressor, random_state=random_state) def fit(self, X, y=None): X = _convert_to_woodwork_structure(X) cat_cols = list(X.select('category').columns) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_to_woodwork_structure(y) y = _convert_woodwork_types_wrapper(y.to_series()) self._component_obj.fit(X, y, silent=True, cat_features=cat_cols) return self @property def feature_importance(self): return self._component_obj.get_feature_importance()
filename_skopt = "400_RP3_12.pkl" filename_csv = "400_RP3.csv" dataset_list = Parallel(n_jobs=parallelism)(delayed(gen_dataset)(x + 20) for x in range(n_dataset)) eval = Evaluator(dataset_list=dataset_list, type_of_user=type_of_user, parallelism=parallelism, filename_csv=filename_csv) hyperparameters = [ Integer(2, 7), Integer(0, 4), Integer(0, 30), Real(0, 0.75) ] for _ in range(n_load_and_rerun): try: with open(filename_skopt, "rb") as f: res_loaded = load(f) f.close() res = forest_minimize( eval.eval, # the function to minimize hyperparameters, # the bounds on each dimension of x acq_func=acq_func, # the acquisition function # acq_optimizer=acq_optimizer, # the acquisition function n_calls=n_calls, # the number of evaluations of f n_random_starts=
from tensorflow.python.keras.layers import Conv2D, Dense, Flatten, CuDNNLSTM, ConvLSTM2D from tensorflow.python.keras.callbacks import TensorBoard from tensorflow.python.keras.optimizers import Adam from tensorflow.python.keras.models import save_model, load_model, Model from tensorflow.python.keras.utils import multi_gpu_model from sklearn.preprocessing import MinMaxScaler # Scikit Optimizer import skopt from skopt import gp_minimize, forest_minimize from skopt.space import Real, Categorical, Integer from skopt.utils import use_named_args # This is where you set your hyperparameters for the model # Hyperparameter tuning dim_learning_rate = Real(low=1e-5, high=1e-3, prior='log-uniform', name='learning_rate') dim_num_dense_layers = Integer(low=0, high=2, name='num_dense_layers') dim_num_epochs = Integer(low=50, high=250, name='num_epochs') dim_num_conv_layers = Integer(low=1, high=4, name='num_conv_layers') dim_kernel_size = Integer(low=3, high=10, name='kernel_size') dim_num_filters = Integer(low=16, high=64, name='num_filters') dimensions = [dim_learning_rate, dim_num_dense_layers, dim_num_epochs,
# Regular Expression to parse score from text output REGEX = re.compile(r'Score: (\d\.\d+)') # Make sure user calls tune.py in the right way if len(sys.argv) == 2: TARGET = int(sys.argv[1]) else: print("usage: python tune.py <1/2/3>") exit() # Hyperparameter Space # space = [ Integer(50, 250, name="population_size"), Integer(2, 6, name="tournament_size"), Integer(1, 5, name="crossover_points"), Real(0.1, 0.35, name="mutation_probability"), Real(1, 2, name="max_sigma"), Real(0, 1, name="learning_rate"), Real(0, 10, name="novelty_threshold"), Real(0, 1, name="linearblend"), Real(0, 0.0001, name="linearblend_delta"), Integer(2, 6, name="nearestNeighbours") ] @use_named_args(space) def evaluate(**parameters: dict): """ Evalute Evolutionary Algorithm using 'parameters' as Hyperparameters Since we're dealing with a Stochastic Algorithm, take the mean over several runs as score """
def indicator_space() -> List[Dimension]: return [ Real(-1.00, 1.00, name='macd'), Integer(-1.00, 1.00, name='macdhist'), Integer(40, 90, name='rmi') ]
# search over different model types pipe = Pipeline([('model', SVC())]) # single categorical value of 'model' parameter is # sets the model class # We will get ConvergenceWarnings because the problem is not well-conditioned. # But that's fine, this is just an example. linsvc_search = { 'model': [LinearSVC(max_iter=1000)], 'model__C': (1e-6, 1e+6, 'log-uniform'), } # explicit dimension classes can be specified like this svc_search = { 'model': Categorical([SVC()]), 'model__C': Real(1e-6, 1e+6, prior='log-uniform'), 'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'), 'model__degree': Integer(1, 8), 'model__kernel': Categorical(['linear', 'poly', 'rbf']), } opt = WeightedBayesSearchCV( pipe, # (parameter space, # of evaluations) [(svc_search, 40), (linsvc_search, 16)], cv=3) opt.fit(X_train, y_train) print("val. score: %s" % opt.best_score_) print("test score: %s" % opt.score(X_test, y_test))
from sklearn.pipeline import Pipeline from sklearn.tree import DecisionTreeClassifier from utility import HyperParameters, Runner from model import load_clean_sample_data_frame, ordinal_data_mapper sample = None iterations = 24 hyper_parameters = HyperParameters({ 'dt__criterion': Categorical(['gini', 'entropy']), 'dt__max_depth': Integer(4, 24), 'dt__min_samples_leaf': Real(0.000001, 0.001), 'dt__min_samples_split': Real(0.000002, 0.002) }) decision_tree_basic = Pipeline([('mapper', ordinal_data_mapper), ('dt', DecisionTreeClassifier())]) def test_decision_tree(): runner = Runner('model/experiment/output/decision_tree_basic', load_clean_sample_data_frame(), 'arrest', decision_tree_basic, hyper_parameters=hyper_parameters) runner.run_classification_search_experiment('roc_auc',
def get_sk_dimensions(api_config, transform="normalize"): """Help routine to setup skopt search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) sk_types = [] sk_dims = [] for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) # Some setup for case that whitelist of values is provided: values_only_type = param_type in ("cat", "ordinal") if (param_values is not None) and (not values_only_type): assert param_range is None param_values = np.unique(param_values) param_range = (param_values[0], param_values[-1]) if param_type == "int": # Integer space in sklearn does not support any warping => Need # to leave the warping as linear in skopt. sk_dims.append( Integer(param_range[0], param_range[-1], transform=transform, name=param_name)) elif param_type == "bool": assert param_range is None assert param_values is None sk_dims.append( Integer(0, 1, transform=transform, name=param_name)) elif param_type in ("cat", "ordinal"): assert param_range is None # Leave x-form to one-hot as per skopt default sk_dims.append(Categorical(param_values, name=param_name)) elif param_type == "real": # Skopt doesn't support all our warpings, so need to pick # closest substitute it does support. # prior = "log-uniform" if param_space in ("log", "logit") else "uniform" if param_space == "log": prior = "log-uniform" elif param_space == "logit": prior = "logit-uniform" else: prior = "uniform" sk_dims.append( Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name)) else: assert False, "type %s not handled in API" % param_type sk_types.append(param_type) return sk_dims, sk_types, param_list