def _dimension_space_from_dict(dict_hyperparams): hyperparams = list() if not isinstance(dict_hyperparams, dict): raise TypeError('Hyperparams must be a dictionary.') for name, hyperparam in dict_hyperparams.items(): hp_type = hyperparam['type'] if hp_type == 'int': hp_range = hyperparam.get('range') or hyperparam.get('values') hp_min = min(hp_range) if hp_range else None hp_max = max(hp_range) if hp_range else None hp_instance = space.Integer(hp_min, hp_max, name=name) elif hp_type == 'float': hp_range = hyperparam.get('range') or hyperparam.get('values') hp_min = min(hp_range) hp_max = max(hp_range) hp_instance = space.Real(hp_min, hp_max, name=name) elif hp_type == 'bool': hp_instance = space.Categorical([True, False], name=name) elif hp_type == 'str': hp_choices = hyperparam.get('range') or hyperparam.get('values') hp_instance = space.Categorical(hp_choices, name=name) hyperparams.append(hp_instance) return hyperparams
def __init__(self, model_callable, param_space, x_train, y_train, kfold_n_splits=5, score_sign=-1, score_measure=None, x_test=None, y_test=None): """ @param model_callable: @param param_space: @param x_train: @param y_train: @param n_calls: @param kfold_n_splits: this is used when no x_test, y_test given, cross validate score, but if x_test, y_test are given, not used @param score_sign: -1 if we want to max the value return by score_measure, 1 if we want to min it @param score_measure: default None for f1_score with avg is macro, callable for score calculation, take y_true as first arg, y_pred as second arg @param x_test: test data set data @param y_test: test data set label """ self.model = model_callable self.x_train = x_train self.y_train = y_train self.x_test = x_test self.y_test = y_test self.param_space = [] self.param_names = [] for param_config in param_space: self.param_names.append(param_config[-1]) if isinstance(param_config[0], list): self.param_space.append( space.Categorical(param_config[0], name=param_config[-1])) elif isinstance(param_config[0], float): self.param_space.append( space.Real(low=param_config[0], high=param_config[1], prior='uniform', name=param_config[-1])) elif isinstance(param_config[0], int): self.param_space.append( space.Integer(low=param_config[0], high=param_config[1], name=param_config[-1])) else: raise self.kfold_n_splits = kfold_n_splits if score_measure is not None: self.score_sign = score_sign self.score_measure = score_measure else: self.score_measure = partial(f1_score, average='macro') self.score_sign = -1
def skopt_bayesian_optimization(X, y): param_space = [ space.Integer(3, 15, name='max_depth'), space.Integer(100, 600, name='n_estimators'), space.Categorical(['gini', 'entropy'], name='criterion'), space.Real(0.01, 1, prior='uniform', name='max_features') ] param_names = ['max_depth', 'n_estimators', 'criterion', 'max_features'] optimization_function = partial(optimize, param_names=param_names, x=X, y=y) result = gp_minimize(optimization_function, dimensions=param_space, n_calls=15, n_random_starts=10, verbose=10) print(dict(zip(param_names, result.x)))
print( "Found new optimum of {:.2f} in iteration {:d} by using parameters {}!" .format( r["fun"], len(r["func_vals"]), np.array2string(np.array(r["x"]), precision=6), )) if __name__ == "__main__": try: optimization_space = [ space.Real(0.3, 1.), # a2-scale space.Real(1, 5.), # theta_scale space.Real(3., 8.), # theta_dot_scale + space.Integer(65, 95), # ac memory + space.Integer(50, 70), # pm memory + space.Real(0.03, 0.3), # theta-spread + space.Real(0.3, 1.), # thetadot-spread + space.Integer(1, 3), # exploration space.Real(5., 10.), # theta-reward + space.Real(0.1, 2.), # thetadot_reward + space.Real(3., 6.), # a2-reward + space.Integer(16, 30), # k_a + space.Integer(18, 35), # k_c + space.Integer(9, 30), # k_pm + ] res = FUNCTIONS[parsed_args.f]( objective, optimization_space,
x = tf.keras.layers.Dense(hidden, activation="elu")(x) output_layer = tf.keras.layers.Dense(1)(x) xception_model = tf.keras.Model(inputs=base_xception.input, outputs=output_layer) xception_model.compile(tf.keras.optimizers.Adam(lr=lr), tf.keras.losses.MeanSquaredError(), ["mae", "accuracy"]) return xception_model search_space = [ space.Real(2.5, 5.5, name='lr'), space.Integer(200, 1000, name='hidden'), space.Real(0, 0.7, name='drop'), space.Integer(1, 32, name='batch_size') ] #Funktion, die von gp_minimize aufgerufen wird. Enthält die fit Funktion @use_named_args(search_space) def evaluate_func(**kwargs): model = tf.keras.wrappers.scikit_learn.KerasRegressor( build_fn=init_xception, epochs=2) model.set_params(**kwargs) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data,
def add_hm_dimensions(self): """Add model hyperparameters to the dimension list. """ for low, high in self.space_list: self.dimensions.append(space.Integer(low, high))
xtrain = x[train_idx] ytrain = y[train_idx] xtest = x[test_idx] ytest = y[test_idx] model.fit(xtrain, ytrain) preds = model.predict(xtest) fold_acc = metrics.accuracy_score(ytest, preds) accuracies.append(fold_acc) return -1 * np.mean(accuracies) param_space = [ space.Integer(3, 15, name="max_depth"), space.Integer(100, 600, name="n_estimators"), space.Categorical(["gini", "entropy"], name="criterion"), space.Real(0.01, 1, prior="uniform", name="max_features"), ] param_names = ["max_depth", "n_estimators", "criterion", "max_features"] optimization_function = partial(optimize, param_names=param_names, x=X, y=y) result = gp_minimize( optimization_function, dimensions=param_space, n_calls=15, n_random_starts=10, verbose=10,
X_test = X_test.loc[:, selected_features] print(f"There are {X.shape[1]} features in the training dataset.") cat_cols = ["shop_id", "item_id", "item_category_id", "item_subname", "city", "shop_type", "shop_subname", "item_subcategory_name", "item_supcategory_name"] cat_cols = [col for col in X.columns if col in cat_cols] X[cat_cols] = X[cat_cols].fillna("None") X_test[cat_cols] = X_test[cat_cols].fillna("None") if args.tune: if args.model == "catboost": param_space = [ space.Integer(100, 5000, name="iterations"), space.Real(0.01, 0.3, prior="uniform", name="learning_rate"), space.Integer(4, 12, name="depth"), space.Integer(2, 30, name="l2_leaf_reg"), space.Integer(1, 255, name="border_count"), space.Real(1e-2, 10, prior="log-uniform", name="random_strength"), space.Real(0, 2, prior="uniform", name="bagging_temperature"), ] param_names = ["iterations", "learning_rate", "depth", "l2_leaf_reg", "border_count", "random_strength", "bagging_temperature"] optimization_function = partial(optimize, param_names=param_names, X=X, y=y, folds=folds) result = gp_minimize(optimization_function, dimensions=param_space, n_calls=40, n_random_starts=10, verbose=10) best_params = dict(zip(param_names, result.x))
from skopt import space from skopt.utils import use_named_args from skopt import gp_minimize """ We will tune the following hyperparameters of the SVM model: - C, the regularization parameter. - kernel, the type of kernel used in the model. - degree, used for the polynomial kernel. - gamma, used in most other kernels. """ search_space = list() search_space.append(space.Real(1e-6, 100.0, prior="log-uniform", name="C")) search_space.append( space.Categorical(["linear", "poly", "rbf", "sigmoid"], name="kernel")) search_space.append(space.Integer(1, 5, name="degree")) search_space.append(space.Real(1e-6, 100.0, prior="log-uniform", name="gamma")) # define the function used to evaluate a given configuration @use_named_args(search_space) def evaluate_model(**params): # configure the model with specific hyperparameters model = SVC() model.set_params(**params) # define test harness cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) # calculate 10-fold cross validation result = cross_val_score(model, X, y, cv=cv, n_jobs=-1, scoring='accuracy') # calculate the mean of the scores estimate = np.mean(result)
# ,'n_iter':1 , 'verbose': True, 'random_state': 0 }, 'variable': { 'learning_rate': [0.1, 0.01, 0.005], 'num_leaves': linspace(10, 1010, 100, dtype=int), 'max_depth': linspace(2, 8, 6, dtype=int), 'min_samples_split': linspace(200, 2200, 10, dtype=int), 'min_samples_leaf': linspace(50, 550, 10, dtype=int) } }, 'skopt_params': [ space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'), space.Integer(1, 30, name='max_depth'), space.Integer(2, 100, name='num_leaves'), space.Integer(200, 2000, name='min_samples_split'), space.Integer(50, 500, name='min_samples_leaf'), ], 'fit_params': { 'verbose': True } }, 'fund_vars': { '_fund_value_st': 1000000 #£10,000 , '_trade_cost': 250 #£2.50 , '_investment_limit_min_val': 100000 #£1,000 ,
y_pred = model.predict(x_val) rmse = mean_squared_error(y_val, y_pred, squared=False) errors.append(rmse) return np.mean(errors) if __name__ == '__main__': df = pd.read_csv('input/train.csv') df = df.drop('id', axis=1) x = df.drop('target', axis=1).values y = df.target.values param_space = [ space.Real(0.01, 0.1, name='eta'), space.Real(0.05, 1.0, name='gamma'), space.Integer(3, 25, name ='max_depth'), space.Integer(1, 7, name='min_child_weight'), space.Real(0.6, 1.0, name='subsample'), space.Real(0.6, 1.0, name='colsample_bytree'), space.Real(0.01, 1.0, name='lambda'), space.Real(0.0, 1.0, name='alpha') ] param_names = ['eta', 'gamma', 'max_depth', 'min_child_weight', 'subsample', 'colsample_bytree', 'lambda', 'alpha'] optimization_function = partial( optimize, param_names=param_names, x=x, y=y
def init_search_space_dict(test=False) -> dict: search_space_dict = dict( dim_batch_sampling_method=space.Categorical(categories=['random_geometric', 'random_uniform', 'systematic_uniform'], name='batch_sampling_method'), dim_window_size=space.Integer(low=10, high=1000, name='window_size'), dim_stride=space.Integer(low=1, high=10, name='stride'), dim_batch_size=space.Integer(low=10, high=1000, name='batch_size'), dim_num_training_steps=space.Integer(low=10000, high=5000000, name='num_training_steps'), dim_learning_rate=space.Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate'), dim_geometric_decay=space.Real(low=1e-6, high=1, prior='log-uniform', name='geometric_decay'), dim_conv_layers_seperable=space.Categorical(categories=[True, False], name='conv_layers_separable'), dim_len_conv1_filters=space.Integer(low=2, high=10, name='len_conv1_filters'), dim_num_conv1_features=space.Integer(low=1, high=64, name='num_conv1_features'), dim_num_conv2_features=space.Integer(low=8, high=128, name='num_conv2_features'), dim_num_fc1_neurons=space.Integer(low=8, high=32, name='num_fc1_neurons'), ### Third conv layer doesn't work yet dim_model_ending=space.Categorical(categories=['one_fc_layer', 'two_fc_layers', 'third_conv_layer'], name='model_ending'), dim_dropout_keep_prob=space.Real(low=.1, high=.9, name='dropout_keep_prob'), ) if test: search_space_dict.update({'dim_batch_size': space.Integer(low=10, high=30, name='batch_size'), 'dim_num_training_steps': space.Integer(low=2, high=4, name='num_training_steps'), 'dim_window_size': space.Integer(low=10, high=50, name='window_size'), 'dim_stride': space.Integer(low=1, high=2, name='stride')}) return search_space_dict
# return the negative mean fold accuracy (since wwe minimize) return -1.0 * np.array(accuracies).mean() # Everything needs to be done in a cross vlaidation loop if __name__ == '__main__': # Bayesian (Gaussian Process Optimisation) # https://scikit-optimize.github.io/stable/modules/generated/skopt.gp_minimize.html df = pd.read_csv('../data/datasets_11167_15520_train.csv') print('%i predictive features'%(len(df.columns)-1)) X = df.drop('price_range', axis=1).values y = df['price_range'].values param_space =[ space.Real(0.1, 1, prior= 'uniform', name='max_features'), space.Integer(100,1000, name='n_estimators'), space.Integer(5, 25, name='min_samples_leaf'), space.Categorical(['gini', 'entropy'], name='criterion') ] param_names = [ 'max_features', 'n_estimators', 'min_samples_leaf', 'criterion' ] optimization_function = partial( optimize, param_names = param_names, x=X,
# split predictors from label train_predictors, train_targets = splitTargetFromLabels(train_data, target_column=target_column) # make an instance of dataSplitter class splitData = DataSplitter(train_predictors, train_targets, target_column) # get monthly splits cv_dict = splitData.splitByMonth() cv_splits = list(zip(cv_dict['train_indices'], cv_dict['test_indices'])) # tune RF using Bayes over monthly cv splits #----------------------------------------------------------------------------------------# rf_hyperparameter_space = [ space.Integer(100, 1000, name='n_estimators'), space.Integer(2, 10, name='max_depth'), space.Real(0.5, 1, prior='uniform', name='max_features'), space.Real(0.00001, 10.0, prior='log-uniform', name='learning_rate'), space.Real(0.5, 1, prior='uniform', name='subsample'), ] param_names = ['n_estimators', 'max_depth', 'max_features', 'learning_rate', 'subsample'] optimization_function = partial( optimize_skopt, param_names=param_names, examples=train_predictors[predictor_columns], targets=train_targets, splits=cv_splits,
base_resnet152 = tf.keras.applications.ResNet152V2(input_shape=(600, 200, 3), include_top=False) x = base_resnet152.output x = tf.keras.layers.Flatten()(x) x = tf.keras.layers.Dropout(drop)(x) x = tf.keras.layers.Dense(hidden, activation="elu")(x) x = tf.keras.layers.Dropout(drop)(x) output_layer = tf.keras.layers.Dense(1)(x) resnet152v2 = tf.keras.Model(inputs=base_resnet152.input, outputs=output_layer) resnet152v2.compile(tf.keras.optimizers.Adam(lr=lr), tf.keras.losses.MeanSquaredError(), ["mae", "accuracy"]) search_space = [space.Real(2,6, name='lr'), space.Integer(300,2000, name='hidden'), space.Real(0.01,0.7,name='drop'), space.Integer(1,4,name='batch_size')] @use_named_args(search_space) def evaluate_func(**kwargs): model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=init_resnet50v2, epochs=1) model.set_params(**kwargs) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.33, random_state=42) fit_model = model.fit(x_train, y_train, validation_split=0.2, callbacks=[callb, callb2], shuffle=True) score = mean_squared_error(y_test, model.predict(x_test)) print("score", score)
return -1 * np.mean(accuracies) if __name__ == "__main__": # read the training data df = pd.read_csv('dataset/train.csv') # here we have training features x = df.drop('price_range' , axis = 1).values # and the targets y = df.price_range.values # define a parameter sapce param_space = [ # max_depth is an integer between 3 and 10 space.Integer(3,15, name='max_depth'), # n_estimator is an integer between 50 and 1500 space.Integer(100, 1500 , name="n_estimators"), # criterion is a category. here we define list of categories space.Categorical(['gini' , 'entropy'] , name='criterion'), # you can also have real numbered sapce and define a # distribution you want to pick it from space.Real(0.01 , 1 , prior='uniform' , name='max_features') ] # make a list of params names # this has to be same order as the search sapce # inside the main function param_names = [ 'max_depth', 'n_estimators',
model.fit(xtrain,ytrain) pred = model.predict(xtest) fold_acc = metrics.accuracy_score(ytest,pred) accuracies.append(fold_acc) return -1.0 * np.mean(accuracies) #read the dataset if __name__ == '__main__': df = pd.read_csv('input/train.csv') X = df.drop('price_range',axis = 1).values y = df.price_range.values param_space = [ space.Integer(3,15, name = 'max_depth'), space.Integer(100,600, name = 'n_estimators'), space.Categorical(['gini','entropy'],name = 'criterion'), space.Real(0.01,1,prior = 'uniform',name = 'max_features') ] param_names = [ 'max_depth', 'n_estimators', 'criterion', 'max_features' ] optimization_function = partial( optimize, param_names = param_names, x=X, y = y )
), metric=parsed_args.metric, ) training_message = "Finished training with cumulative z-error {:.2f}".format( result.get_cum_state_error().flatten()[1]) print(training_message) return result.get_cum_state_error().flatten()[1] if __name__ == "__main__": try: optimization_space = [ space.Real(2., 35.0), # z-scale space.Real(0.3, 1.0), # zdot_reward space.Real(2., 5.), # action reward space.Integer(1, 3), # exploration space.Real(1e-5, 3e-2, prior="log-uniform"), # tolerance space.Integer(16, 32), # Maxmem * 50 space.Integer(13, 18), # ka space.Real(1e-3, 1., prior="log-uniform"), # alpha a space.Integer(18, 24), # kc space.Real(1e-3, 1., prior="log-uniform"), # alpha c space.Integer(10, 20), # Maxmem * 50 space.Integer(9, 12), # kpm space.Real(3e-8, 3e-4, prior="log-uniform"), # pred_tol pm space.Real(0.75, 0.95), # lambda space.Real(0.90, 0.97), # gamma ] print("Starting new optimization run.")
tf.keras.layers.Flatten(), tf.keras.layers.Dense(1, activation="softmax") ]) model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError(), metrics=['accuracy']) return model""" # Search space for the hyperparameter optimization search_space = [ space.Real(2, 6, name='lr'), # 1e-7, 1e-2 # Consider lr between 10^-6 and 1 space.Real(0, 0.7, name='drop1'), space.Real(0, 0.7, name='drop2'), space.Real(0, 0.15, name='loss1'), space.Real(0, 0.15, name='loss2'), space.Integer(1, 32, name='batch_size') ] # Splits the data to create the test and validation sets and trains the model. Evaluates a given Configuration and # creates a .txt file that stores the current values of the parameters after every iteration. Returns the minimal # reached loss across all iterations. @use_named_args(search_space) def evaluate_func(**kwargs): model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=alexnet, epochs=2) model.set_params(**kwargs) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.33,
# split predictors from label train_predictors, train_targets = splitTargetFromLabels( train_data, target_column=target_column) # make an instance of dataSplitter class splitData = DataSplitter(train_predictors, train_targets, target_column) # get monthly splits cv_dict = splitData.splitByMonth() cv_splits = list(zip(cv_dict['train_indices'], cv_dict['test_indices'])) # tune RF using Bayes over monthly cv splits #----------------------------------------------------------------------------------------# rf_hyperparameter_space = [ space.Integer(2, 500, name='min_samples_leaf'), space.Integer(100, 500, name='n_estimators'), space.Integer(3, 25, name='max_depth'), space.Real(0.01, 1, prior='uniform', name='max_features'), space.Real(0.01, 1, prior='uniform', name='ccp_alpha') ] param_names = [ 'min_samples_leaf', 'n_estimators', 'max_depth', 'max_features', 'ccp_alpha' ] optimization_function = partial( optimize_skopt, param_names=param_names, examples=train_predictors[predictor_columns],
accuracies.append(fold_accuracy) return -1 * np.mean(accuracies) if __name__ == "__main__": df = pd.read_csv(file) # Price range is the targe variable X= df.drop("price_range", axis = 1).values y = df.price_range.values # define a param space param_space = [ # max_depth is an integer between 3 and 10 space.Integer(3, 15, name="max_depth"), # n_estimators is an integer between 50 and 1500 spcae.Integer(100, 1500, name="n_estimators") #criterion is a category space.Categorical(["gini", "entropy"], name ="criterion") # You can also have a real numbered space and define a distribution you want to pick it from space.Real(0.01, 1, prior="uniform", name = "max_features") ] # make a list of param names # SAME ORDER AS THE PARAM SPACE param_names = [ "max_depth", "n_estimators]", "criterion",