def init_search_space_dict(test=False) -> dict: search_space_dict = dict( dim_batch_sampling_method=space.Categorical(categories=['random_geometric', 'random_uniform', 'systematic_uniform'], name='batch_sampling_method'), dim_window_size=space.Integer(low=10, high=1000, name='window_size'), dim_stride=space.Integer(low=1, high=10, name='stride'), dim_batch_size=space.Integer(low=10, high=1000, name='batch_size'), dim_num_training_steps=space.Integer(low=10000, high=5000000, name='num_training_steps'), dim_learning_rate=space.Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate'), dim_geometric_decay=space.Real(low=1e-6, high=1, prior='log-uniform', name='geometric_decay'), dim_conv_layers_seperable=space.Categorical(categories=[True, False], name='conv_layers_separable'), dim_len_conv1_filters=space.Integer(low=2, high=10, name='len_conv1_filters'), dim_num_conv1_features=space.Integer(low=1, high=64, name='num_conv1_features'), dim_num_conv2_features=space.Integer(low=8, high=128, name='num_conv2_features'), dim_num_fc1_neurons=space.Integer(low=8, high=32, name='num_fc1_neurons'), ### Third conv layer doesn't work yet dim_model_ending=space.Categorical(categories=['one_fc_layer', 'two_fc_layers', 'third_conv_layer'], name='model_ending'), dim_dropout_keep_prob=space.Real(low=.1, high=.9, name='dropout_keep_prob'), ) if test: search_space_dict.update({'dim_batch_size': space.Integer(low=10, high=30, name='batch_size'), 'dim_num_training_steps': space.Integer(low=2, high=4, name='num_training_steps'), 'dim_window_size': space.Integer(low=10, high=50, name='window_size'), 'dim_stride': space.Integer(low=1, high=2, name='stride')}) return search_space_dict
def _dimension_space_from_dict(dict_hyperparams): hyperparams = list() if not isinstance(dict_hyperparams, dict): raise TypeError('Hyperparams must be a dictionary.') for name, hyperparam in dict_hyperparams.items(): hp_type = hyperparam['type'] if hp_type == 'int': hp_range = hyperparam.get('range') or hyperparam.get('values') hp_min = min(hp_range) if hp_range else None hp_max = max(hp_range) if hp_range else None hp_instance = space.Integer(hp_min, hp_max, name=name) elif hp_type == 'float': hp_range = hyperparam.get('range') or hyperparam.get('values') hp_min = min(hp_range) hp_max = max(hp_range) hp_instance = space.Real(hp_min, hp_max, name=name) elif hp_type == 'bool': hp_instance = space.Categorical([True, False], name=name) elif hp_type == 'str': hp_choices = hyperparam.get('range') or hyperparam.get('values') hp_instance = space.Categorical(hp_choices, name=name) hyperparams.append(hp_instance) return hyperparams
def __init__(self, model_callable, param_space, x_train, y_train, kfold_n_splits=5, score_sign=-1, score_measure=None, x_test=None, y_test=None): """ @param model_callable: @param param_space: @param x_train: @param y_train: @param n_calls: @param kfold_n_splits: this is used when no x_test, y_test given, cross validate score, but if x_test, y_test are given, not used @param score_sign: -1 if we want to max the value return by score_measure, 1 if we want to min it @param score_measure: default None for f1_score with avg is macro, callable for score calculation, take y_true as first arg, y_pred as second arg @param x_test: test data set data @param y_test: test data set label """ self.model = model_callable self.x_train = x_train self.y_train = y_train self.x_test = x_test self.y_test = y_test self.param_space = [] self.param_names = [] for param_config in param_space: self.param_names.append(param_config[-1]) if isinstance(param_config[0], list): self.param_space.append( space.Categorical(param_config[0], name=param_config[-1])) elif isinstance(param_config[0], float): self.param_space.append( space.Real(low=param_config[0], high=param_config[1], prior='uniform', name=param_config[-1])) elif isinstance(param_config[0], int): self.param_space.append( space.Integer(low=param_config[0], high=param_config[1], name=param_config[-1])) else: raise self.kfold_n_splits = kfold_n_splits if score_measure is not None: self.score_sign = score_sign self.score_measure = score_measure else: self.score_measure = partial(f1_score, average='macro') self.score_sign = -1
def __init__(self, objective_callable, param_space): self.objective_callable = objective_callable self.param_space = [] self.param_names = [] for param_config in param_space: self.param_names.append(param_config[-1]) self.param_space.append( space.Real(low=param_config[0], high=param_config[1], prior='uniform', name=param_config[-1]))
def skopt_bayesian_optimization(X, y): param_space = [ space.Integer(3, 15, name='max_depth'), space.Integer(100, 600, name='n_estimators'), space.Categorical(['gini', 'entropy'], name='criterion'), space.Real(0.01, 1, prior='uniform', name='max_features') ] param_names = ['max_depth', 'n_estimators', 'criterion', 'max_features'] optimization_function = partial(optimize, param_names=param_names, x=X, y=y) result = gp_minimize(optimization_function, dimensions=param_space, n_calls=15, n_random_starts=10, verbose=10) print(dict(zip(param_names, result.x)))
x = tf.keras.layers.Dropout(drop)(x) x = tf.keras.layers.Dense(hidden, activation="elu")(x) output_layer = tf.keras.layers.Dense(1)(x) xception_model = tf.keras.Model(inputs=base_xception.input, outputs=output_layer) xception_model.compile(tf.keras.optimizers.Adam(lr=lr), tf.keras.losses.MeanSquaredError(), ["mae", "accuracy"]) return xception_model search_space = [ space.Real(2.5, 5.5, name='lr'), space.Integer(200, 1000, name='hidden'), space.Real(0, 0.7, name='drop'), space.Integer(1, 32, name='batch_size') ] #Funktion, die von gp_minimize aufgerufen wird. Enthält die fit Funktion @use_named_args(search_space) def evaluate_func(**kwargs): model = tf.keras.wrappers.scikit_learn.KerasRegressor( build_fn=init_xception, epochs=2) model.set_params(**kwargs) x_train, x_test, y_train, y_test = train_test_split(x_data,
model.fit(x_train, y_train) y_pred = model.predict(x_val) rmse = mean_squared_error(y_val, y_pred, squared=False) errors.append(rmse) return np.mean(errors) if __name__ == '__main__': df = pd.read_csv('input/train.csv') df = df.drop('id', axis=1) x = df.drop('target', axis=1).values y = df.target.values param_space = [ space.Real(0.01, 0.1, name='eta'), space.Real(0.05, 1.0, name='gamma'), space.Integer(3, 25, name ='max_depth'), space.Integer(1, 7, name='min_child_weight'), space.Real(0.6, 1.0, name='subsample'), space.Real(0.6, 1.0, name='colsample_bytree'), space.Real(0.01, 1.0, name='lambda'), space.Real(0.0, 1.0, name='alpha') ] param_names = ['eta', 'gamma', 'max_depth', 'min_child_weight', 'subsample', 'colsample_bytree', 'lambda', 'alpha'] optimization_function = partial( optimize, param_names=param_names,
xtest = x[test_idx] ytest = y[test_idx] model.fit(xtrain, ytrain) preds = model.predict(xtest) fold_acc = metrics.accuracy_score(ytest, preds) accuracies.append(fold_acc) return -1 * np.mean(accuracies) param_space = [ space.Integer(3, 15, name="max_depth"), space.Integer(100, 600, name="n_estimators"), space.Categorical(["gini", "entropy"], name="criterion"), space.Real(0.01, 1, prior="uniform", name="max_features"), ] param_names = ["max_depth", "n_estimators", "criterion", "max_features"] optimization_function = partial(optimize, param_names=param_names, x=X, y=y) result = gp_minimize( optimization_function, dimensions=param_space, n_calls=15, n_random_starts=10, verbose=10, ) print(dict(zip(param_names, result.x)))
target_column=target_column) # make an instance of dataSplitter class splitData = DataSplitter(train_predictors, train_targets, target_column) # get monthly splits cv_dict = splitData.splitByMonth() cv_splits = list(zip(cv_dict['train_indices'], cv_dict['test_indices'])) # tune RF using Bayes over monthly cv splits #----------------------------------------------------------------------------------------# rf_hyperparameter_space = [ space.Integer(100, 1000, name='n_estimators'), space.Integer(2, 10, name='max_depth'), space.Real(0.5, 1, prior='uniform', name='max_features'), space.Real(0.00001, 10.0, prior='log-uniform', name='learning_rate'), space.Real(0.5, 1, prior='uniform', name='subsample'), ] param_names = ['n_estimators', 'max_depth', 'max_features', 'learning_rate', 'subsample'] optimization_function = partial( optimize_skopt, param_names=param_names, examples=train_predictors[predictor_columns], targets=train_targets, splits=cv_splits, scoring_func=scoring_func )
print(f"There are {X.shape[1]} features in the training dataset.") cat_cols = ["shop_id", "item_id", "item_category_id", "item_subname", "city", "shop_type", "shop_subname", "item_subcategory_name", "item_supcategory_name"] cat_cols = [col for col in X.columns if col in cat_cols] X[cat_cols] = X[cat_cols].fillna("None") X_test[cat_cols] = X_test[cat_cols].fillna("None") if args.tune: if args.model == "catboost": param_space = [ space.Integer(100, 5000, name="iterations"), space.Real(0.01, 0.3, prior="uniform", name="learning_rate"), space.Integer(4, 12, name="depth"), space.Integer(2, 30, name="l2_leaf_reg"), space.Integer(1, 255, name="border_count"), space.Real(1e-2, 10, prior="log-uniform", name="random_strength"), space.Real(0, 2, prior="uniform", name="bagging_temperature"), ] param_names = ["iterations", "learning_rate", "depth", "l2_leaf_reg", "border_count", "random_strength", "bagging_temperature"] optimization_function = partial(optimize, param_names=param_names, X=X, y=y, folds=folds) result = gp_minimize(optimization_function, dimensions=param_space, n_calls=40, n_random_starts=10, verbose=10) best_params = dict(zip(param_names, result.x)) print(best_params)
# 'cv':2 # ,'n_iter':1 , 'verbose': True, 'random_state': 0 }, 'variable': { 'learning_rate': [0.1, 0.01, 0.005], 'num_leaves': linspace(10, 1010, 100, dtype=int), 'max_depth': linspace(2, 8, 6, dtype=int), 'min_samples_split': linspace(200, 2200, 10, dtype=int), 'min_samples_leaf': linspace(50, 550, 10, dtype=int) } }, 'skopt_params': [ space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'), space.Integer(1, 30, name='max_depth'), space.Integer(2, 100, name='num_leaves'), space.Integer(200, 2000, name='min_samples_split'), space.Integer(50, 500, name='min_samples_leaf'), ], 'fit_params': { 'verbose': True } }, 'fund_vars': { '_fund_value_st': 1000000 #£10,000 , '_trade_cost': 250 #£2.50 , '_investment_limit_min_val': 100000 #£1,000
ytest = y[test_idx] model.fit(xtrain, ytrain) preds = model.predict(xtest) fold_acc = accuracy_score(ytest, preds) accuracies.append(fold_acc) return -1.0 * np.mean(accuracies) # Parameter Space for XGBoost param_space = [ space.Integer(3, 15, name='max_depth'), space.Integer(100, 600, name='n_estimators'), space.Categorical(['gini', 'entropy'], name='criterion'), space.Real(0.01, 1, prior='uniform', name='colsample_bytree'), space.Real(0.001, 1, prior='uniform', name='learning_rate') ] param_names = [ "max_depth", "n_estimators", "criterion", "colsample_bytree", "learning_rate" ] # Optimization Function optimization_function = partial(optimize, param_names=param_names, x=X, y=label_encoder_y) result = gp_minimize(optimization_function, dimensions=param_space, n_calls=10,
x = base_alexnet.output x = tf.keras.layers.Flatten()(x) x = tf.keras.layers.Dropout(drop)(x) x = tf.keras.layers.Dense(hidden, activation="elu") output_layer = tf.keras.layers.Dense(1)(x) alexnet = tf.keras.Model(inputs=base_alexnet.input, outputs=output_layer) alexnet.compile(tf.keras.optimizers.Adam(lr=lr), tf.keras.losses.MeanSquaredError(), ["mae", "accuracy"]) return alexnet search_space = [space.Real(2.5, 5.5, name='lr'), space.Integer(200, 1000, name='hidden'), space.Real(0, 0.7, name='drop'), space.Integer(1, 32, name='batch_size')] #Funktion, die von gp_minimize aufgerufen wird. Enthält die fit Funktion @use_named_args(search_space) def evaluate_func(**kwargs): model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=init_xception,epochs=2) model.set_params(**kwargs) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.33, random_state=42) y_train = np.random.random((10,1)) y_test = np.random.random((5,1))
from sklearn.metrics import accuracy_score from sklearn.svm import SVC from skopt import space from skopt.utils import use_named_args from skopt import gp_minimize """ We will tune the following hyperparameters of the SVM model: - C, the regularization parameter. - kernel, the type of kernel used in the model. - degree, used for the polynomial kernel. - gamma, used in most other kernels. """ search_space = list() search_space.append(space.Real(1e-6, 100.0, prior="log-uniform", name="C")) search_space.append( space.Categorical(["linear", "poly", "rbf", "sigmoid"], name="kernel")) search_space.append(space.Integer(1, 5, name="degree")) search_space.append(space.Real(1e-6, 100.0, prior="log-uniform", name="gamma")) # define the function used to evaluate a given configuration @use_named_args(search_space) def evaluate_model(**params): # configure the model with specific hyperparameters model = SVC() model.set_params(**params) # define test harness cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) # calculate 10-fold cross validation
def callback(r): if r["func_vals"][-1] == min(r["func_vals"]): print( "Found new optimum of {:.2f} in iteration {:d} by using parameters {}!" .format( r["fun"], len(r["func_vals"]), np.array2string(np.array(r["x"]), precision=6), )) if __name__ == "__main__": try: optimization_space = [ space.Real(0.3, 1.), # a2-scale space.Real(1, 5.), # theta_scale space.Real(3., 8.), # theta_dot_scale + space.Integer(65, 95), # ac memory + space.Integer(50, 70), # pm memory + space.Real(0.03, 0.3), # theta-spread + space.Real(0.3, 1.), # thetadot-spread + space.Integer(1, 3), # exploration space.Real(5., 10.), # theta-reward + space.Real(0.1, 2.), # thetadot_reward + space.Real(3., 6.), # a2-reward + space.Integer(16, 30), # k_a + space.Integer(18, 35), # k_c + space.Integer(9, 30), # k_pm + ]
import skopt.space as space from deephyper.problem import NaProblem from nas_big_data.covertype.dense_skipco import create_search_space from nas_big_data.covertype.load_data import load_data dim = space.Real(1, 1000, prior="log-uniform") Problem = NaProblem(seed=2019) Problem.load_data(load_data) Problem.search_space(create_search_space, num_layers=10) Problem.hyperparameters( batch_size=space.Categorical([32, 64, 128, 256, 512, 1024]), learning_rate=space.Real(1e-3, 1e-1, prior="log-uniform"), optimizer="adam", num_epochs=20, # maximal bound verbose=0, callbacks=dict( CSVExtendedLogger=dict(), ModelCheckpoint=dict( monitor="val_acc", mode="max", save_best_only=True, verbose=0, filepath="model.h5", save_weights_only=True, ),
lr = 1 / np.power(10, lr) base_resnet152 = tf.keras.applications.ResNet152V2(input_shape=(600, 200, 3), include_top=False) x = base_resnet152.output x = tf.keras.layers.Flatten()(x) x = tf.keras.layers.Dropout(drop)(x) x = tf.keras.layers.Dense(hidden, activation="elu")(x) x = tf.keras.layers.Dropout(drop)(x) output_layer = tf.keras.layers.Dense(1)(x) resnet152v2 = tf.keras.Model(inputs=base_resnet152.input, outputs=output_layer) resnet152v2.compile(tf.keras.optimizers.Adam(lr=lr), tf.keras.losses.MeanSquaredError(), ["mae", "accuracy"]) search_space = [space.Real(2,6, name='lr'), space.Integer(300,2000, name='hidden'), space.Real(0.01,0.7,name='drop'), space.Integer(1,4,name='batch_size')] @use_named_args(search_space) def evaluate_func(**kwargs): model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=init_resnet50v2, epochs=1) model.set_params(**kwargs) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.33, random_state=42) fit_model = model.fit(x_train, y_train, validation_split=0.2, callbacks=[callb, callb2], shuffle=True) score = mean_squared_error(y_test, model.predict(x_test)) print("score", score)
# here we have training features x = df.drop('price_range' , axis = 1).values # and the targets y = df.price_range.values # define a parameter sapce param_space = [ # max_depth is an integer between 3 and 10 space.Integer(3,15, name='max_depth'), # n_estimator is an integer between 50 and 1500 space.Integer(100, 1500 , name="n_estimators"), # criterion is a category. here we define list of categories space.Categorical(['gini' , 'entropy'] , name='criterion'), # you can also have real numbered sapce and define a # distribution you want to pick it from space.Real(0.01 , 1 , prior='uniform' , name='max_features') ] # make a list of params names # this has to be same order as the search sapce # inside the main function param_names = [ 'max_depth', 'n_estimators', 'criterion', 'max_features' ] """ BY using functools partial, i am creating a new function which has same parameters as the optimize function expect for the
drop_cols = [ 'scheduled_year', 'scheduled_weekofyear', 'scheduled_month', 'scheduled_dayofweek', 'scheduled_weekend', 'delivery_year', 'delivery_weekofyear', 'delivery_month', 'delivery_dayofweek', 'delivery_weekend', "City", "Code" ] df = df.drop(drop_cols, axis=1) X = df.drop(["Cost"], axis=1).values y = df.Cost.values param_space = [ space.Integer(3, 15, name="max_depth"), space.Integer(100, 800, name="n_estimators"), space.Real(0.01, 0.1, name="learning_rate") ] param_names = [ "max_depth", "n_estimators", "learning_rate" ] optimization_function = partial(optimize, param_names=param_names, x=X, y=y) result = gp_minimize( optimization_function, dimensions=param_space, n_calls=15, n_random_starts=10,
cs.lookback_result( LOOK_BACK_WINDOW, look_back_metric="median", ), metric=parsed_args.metric, ) training_message = "Finished training with cumulative z-error {:.2f}".format( result.get_cum_state_error().flatten()[1]) print(training_message) return result.get_cum_state_error().flatten()[1] if __name__ == "__main__": try: optimization_space = [ space.Real(2., 35.0), # z-scale space.Real(0.3, 1.0), # zdot_reward space.Real(2., 5.), # action reward space.Integer(1, 3), # exploration space.Real(1e-5, 3e-2, prior="log-uniform"), # tolerance space.Integer(16, 32), # Maxmem * 50 space.Integer(13, 18), # ka space.Real(1e-3, 1., prior="log-uniform"), # alpha a space.Integer(18, 24), # kc space.Real(1e-3, 1., prior="log-uniform"), # alpha c space.Integer(10, 20), # Maxmem * 50 space.Integer(9, 12), # kpm space.Real(3e-8, 3e-4, prior="log-uniform"), # pred_tol pm space.Real(0.75, 0.95), # lambda space.Real(0.90, 0.97), # gamma ]
### Get data data = pd.read_csv(folder_path_data + r'/final_proc_dat_labjansen.csv') ### Get function from model_functions import VIEW_INDIPENDENTxCONTEXT,VIEW_DEPENDENT,VIEW_DEPENDENTxCONTEXT_DEPENDENT from model_functions import VIEW_INDEPENDENT, VIEW_INDEPENDENTxVIEW_DEPENDENT from model_functions import VIEW_INDEPENDENTxVIEW_DEPENDENTxCONTEXT #### get unique IDS sample_answer_clms = [i for i in data.columns.values.tolist() if 'answer' in i] sample_perspective_clms = [i for i in data.columns.values.tolist() if 'perspective' in i] ## idiosyncratic param space alpha_skl = space.Real(name='alpha', low=0, high=1) # {0,1} rate at which familiarity was aquired sigma_skl = space.Real(name='sigma', low=0, high=1) # {0,1} context dependent learning rate beta_skl = space.Real(name='beta', low=0.1, high=20) # {0,20} general disposition of VPS towards stochasticity of actions lamda_skl = space.Real(name='lamd_a', low=0, high=2) # {0,1} maximum familiarity alpha_raw = np.around(np.linspace(0, 0.9, num=100),decimals = 2) sigma_raw = np.around(np.linspace(0, 0.9, num=100),decimals = 2) beta_raw = np.around(np.linspace(0.1, 19.9, num=200),decimals = 2) lamda_raw = np.around(np.linspace(0, 1.9, num=200),decimals = 2) alpha_cat = space.Categorical(categories=alpha_raw,name='alpha_cat',transform = 'identity') # {0,1} rate at which familiarity was aquired sigma_cat = space.Categorical(categories=sigma_raw,name='sigma_cat',transform = 'identity') # {0,1} context dependent learning rate beta_cat = space.Categorical(categories=beta_raw,name='beta_cat',transform = 'identity') # {0,20} general disposition of VPS towards stochasticity of actions lamda_cat = space.Categorical(categories=lamda_raw,name='lamda_cat',transform = 'identity') # {0,1} maximum familiarity alpha_cat_1 = space.Categorical(categories=alpha_raw,name='alpha_cat_1',transform = 'identity') # {0,1} rate at which familiarity was aquired lamda_cat_1 = space.Categorical(categories=lamda_raw,name='lamda_cat_1',transform = 'identity') # {0,1} maximum familiarity
train_data, target_column=target_column) # make an instance of dataSplitter class splitData = DataSplitter(train_predictors, train_targets, target_column) # get monthly splits cv_dict = splitData.splitByMonth() cv_splits = list(zip(cv_dict['train_indices'], cv_dict['test_indices'])) # tune RF using Bayes over monthly cv splits #----------------------------------------------------------------------------------------# rf_hyperparameter_space = [ space.Integer(2, 500, name='min_samples_leaf'), space.Integer(100, 500, name='n_estimators'), space.Integer(3, 25, name='max_depth'), space.Real(0.01, 1, prior='uniform', name='max_features'), space.Real(0.01, 1, prior='uniform', name='ccp_alpha') ] param_names = [ 'min_samples_leaf', 'n_estimators', 'max_depth', 'max_features', 'ccp_alpha' ] optimization_function = partial( optimize_skopt, param_names=param_names, examples=train_predictors[predictor_columns], targets=train_targets, splits=cv_splits, scoring_func=scoring_func)
metrics=["accuracy"]) return model """model = tf.keras.models.Sequential([ tf.keras.layers.Conv2D(64, kernel_size=3, activation="relu", input_shape=(600, 200, 3)), tf.keras.layers.Conv2D(32, kernel_size=3, activation="relu"), tf.keras.layers.Flatten(), tf.keras.layers.Dense(1, activation="softmax") ]) model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError(), metrics=['accuracy']) return model""" # Search space for the hyperparameter optimization search_space = [ space.Real(2, 6, name='lr'), # 1e-7, 1e-2 # Consider lr between 10^-6 and 1 space.Real(0, 0.7, name='drop1'), space.Real(0, 0.7, name='drop2'), space.Real(0, 0.15, name='loss1'), space.Real(0, 0.15, name='loss2'), space.Integer(1, 32, name='batch_size') ] # Splits the data to create the test and validation sets and trains the model. Evaluates a given Configuration and # creates a .txt file that stores the current values of the parameters after every iteration. Returns the minimal # reached loss across all iterations. @use_named_args(search_space) def evaluate_func(**kwargs): model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=alexnet, epochs=2)