Ejemplo n.º 1
0
def _dimension_space_from_dict(dict_hyperparams):
    hyperparams = list()

    if not isinstance(dict_hyperparams, dict):
        raise TypeError('Hyperparams must be a dictionary.')

    for name, hyperparam in dict_hyperparams.items():
        hp_type = hyperparam['type']

        if hp_type == 'int':
            hp_range = hyperparam.get('range') or hyperparam.get('values')
            hp_min = min(hp_range) if hp_range else None
            hp_max = max(hp_range) if hp_range else None
            hp_instance = space.Integer(hp_min, hp_max, name=name)

        elif hp_type == 'float':
            hp_range = hyperparam.get('range') or hyperparam.get('values')
            hp_min = min(hp_range)
            hp_max = max(hp_range)
            hp_instance = space.Real(hp_min, hp_max, name=name)

        elif hp_type == 'bool':
            hp_instance = space.Categorical([True, False], name=name)

        elif hp_type == 'str':
            hp_choices = hyperparam.get('range') or hyperparam.get('values')
            hp_instance = space.Categorical(hp_choices, name=name)

        hyperparams.append(hp_instance)

    return hyperparams
Ejemplo n.º 2
0
    def __init__(self,
                 model_callable,
                 param_space,
                 x_train,
                 y_train,
                 kfold_n_splits=5,
                 score_sign=-1,
                 score_measure=None,
                 x_test=None,
                 y_test=None):
        """

        @param model_callable:
        @param param_space:
        @param x_train:
        @param y_train:
        @param n_calls:
        @param kfold_n_splits: this is used when no x_test, y_test given, cross validate score, but if x_test, y_test are given, not used
        @param score_sign: -1 if we want to max the value return by score_measure, 1 if we want to min it
        @param score_measure: default None for f1_score with avg is macro, callable for score calculation, take y_true as first arg, y_pred as second arg
        @param x_test: test data set data
        @param y_test: test data set label
        """
        self.model = model_callable
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.param_space = []
        self.param_names = []
        for param_config in param_space:
            self.param_names.append(param_config[-1])
            if isinstance(param_config[0], list):
                self.param_space.append(
                    space.Categorical(param_config[0], name=param_config[-1]))
            elif isinstance(param_config[0], float):
                self.param_space.append(
                    space.Real(low=param_config[0],
                               high=param_config[1],
                               prior='uniform',
                               name=param_config[-1]))
            elif isinstance(param_config[0], int):
                self.param_space.append(
                    space.Integer(low=param_config[0],
                                  high=param_config[1],
                                  name=param_config[-1]))
            else:
                raise
        self.kfold_n_splits = kfold_n_splits

        if score_measure is not None:
            self.score_sign = score_sign
            self.score_measure = score_measure
        else:
            self.score_measure = partial(f1_score, average='macro')
            self.score_sign = -1
Ejemplo n.º 3
0
def skopt_bayesian_optimization(X, y):

    param_space = [
        space.Integer(3, 15, name='max_depth'),
        space.Integer(100, 600, name='n_estimators'),
        space.Categorical(['gini', 'entropy'], name='criterion'),
        space.Real(0.01, 1, prior='uniform', name='max_features')
    ]

    param_names = ['max_depth', 'n_estimators', 'criterion', 'max_features']

    optimization_function = partial(optimize,
                                    param_names=param_names,
                                    x=X,
                                    y=y)

    result = gp_minimize(optimization_function,
                         dimensions=param_space,
                         n_calls=15,
                         n_random_starts=10,
                         verbose=10)
    print(dict(zip(param_names, result.x)))
Ejemplo n.º 4
0
        print(
            "Found new optimum of {:.2f} in iteration {:d} by using parameters {}!"
            .format(
                r["fun"],
                len(r["func_vals"]),
                np.array2string(np.array(r["x"]), precision=6),
            ))


if __name__ == "__main__":
    try:
        optimization_space = [
            space.Real(0.3, 1.),  # a2-scale
            space.Real(1, 5.),  # theta_scale
            space.Real(3., 8.),  # theta_dot_scale +
            space.Integer(65, 95),  # ac memory +
            space.Integer(50, 70),  # pm memory +
            space.Real(0.03, 0.3),  # theta-spread +
            space.Real(0.3, 1.),  # thetadot-spread +
            space.Integer(1, 3),  # exploration
            space.Real(5., 10.),  # theta-reward +
            space.Real(0.1, 2.),  # thetadot_reward +
            space.Real(3., 6.),  # a2-reward +
            space.Integer(16, 30),  # k_a +
            space.Integer(18, 35),  # k_c +
            space.Integer(9, 30),  # k_pm +
        ]

        res = FUNCTIONS[parsed_args.f](
            objective,
            optimization_space,
Ejemplo n.º 5
0
    x = tf.keras.layers.Dense(hidden, activation="elu")(x)

    output_layer = tf.keras.layers.Dense(1)(x)
    xception_model = tf.keras.Model(inputs=base_xception.input,
                                    outputs=output_layer)

    xception_model.compile(tf.keras.optimizers.Adam(lr=lr),
                           tf.keras.losses.MeanSquaredError(),
                           ["mae", "accuracy"])
    return xception_model


search_space = [
    space.Real(2.5, 5.5, name='lr'),
    space.Integer(200, 1000, name='hidden'),
    space.Real(0, 0.7, name='drop'),
    space.Integer(1, 32, name='batch_size')
]


#Funktion, die von gp_minimize aufgerufen wird. Enthält die fit Funktion
@use_named_args(search_space)
def evaluate_func(**kwargs):

    model = tf.keras.wrappers.scikit_learn.KerasRegressor(
        build_fn=init_xception, epochs=2)
    model.set_params(**kwargs)

    x_train, x_test, y_train, y_test = train_test_split(x_data,
                                                        y_data,
Ejemplo n.º 6
0
 def add_hm_dimensions(self):
     """Add model hyperparameters to the dimension list.
     """
     for low, high in self.space_list:
         self.dimensions.append(space.Integer(low, high))
Ejemplo n.º 7
0
        xtrain = x[train_idx]
        ytrain = y[train_idx]

        xtest = x[test_idx]
        ytest = y[test_idx]

        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        accuracies.append(fold_acc)

    return -1 * np.mean(accuracies)


param_space = [
    space.Integer(3, 15, name="max_depth"),
    space.Integer(100, 600, name="n_estimators"),
    space.Categorical(["gini", "entropy"], name="criterion"),
    space.Real(0.01, 1, prior="uniform", name="max_features"),
]

param_names = ["max_depth", "n_estimators", "criterion", "max_features"]

optimization_function = partial(optimize, param_names=param_names, x=X, y=y)

result = gp_minimize(
    optimization_function,
    dimensions=param_space,
    n_calls=15,
    n_random_starts=10,
    verbose=10,
Ejemplo n.º 8
0
    X_test = X_test.loc[:, selected_features]
    print(f"There are {X.shape[1]} features in the training dataset.")

    cat_cols = ["shop_id", "item_id", "item_category_id", "item_subname", "city", "shop_type", "shop_subname",
                "item_subcategory_name", "item_supcategory_name"]
    cat_cols = [col for col in X.columns if col in cat_cols]

    X[cat_cols] = X[cat_cols].fillna("None")
    X_test[cat_cols] = X_test[cat_cols].fillna("None")

    if args.tune:

        if args.model == "catboost":

            param_space = [
                space.Integer(100, 5000, name="iterations"),
                space.Real(0.01, 0.3, prior="uniform", name="learning_rate"),
                space.Integer(4, 12, name="depth"),
                space.Integer(2, 30, name="l2_leaf_reg"),
                space.Integer(1, 255, name="border_count"),
                space.Real(1e-2, 10, prior="log-uniform", name="random_strength"),
                space.Real(0, 2, prior="uniform", name="bagging_temperature"),
            ]

            param_names = ["iterations", "learning_rate", "depth", "l2_leaf_reg", "border_count", "random_strength",
                           "bagging_temperature"]

            optimization_function = partial(optimize, param_names=param_names, X=X, y=y, folds=folds)
            result = gp_minimize(optimization_function, dimensions=param_space, n_calls=40, n_random_starts=10,
                                 verbose=10)
            best_params = dict(zip(param_names, result.x))
from skopt import space
from skopt.utils import use_named_args
from skopt import gp_minimize
"""
We will tune the following hyperparameters of the SVM model:

- C, the regularization parameter.
- kernel, the type of kernel used in the model.
- degree, used for the polynomial kernel.
- gamma, used in most other kernels.
"""
search_space = list()
search_space.append(space.Real(1e-6, 100.0, prior="log-uniform", name="C"))
search_space.append(
    space.Categorical(["linear", "poly", "rbf", "sigmoid"], name="kernel"))
search_space.append(space.Integer(1, 5, name="degree"))
search_space.append(space.Real(1e-6, 100.0, prior="log-uniform", name="gamma"))


# define the function used to evaluate a given configuration
@use_named_args(search_space)
def evaluate_model(**params):
    # configure the model with specific hyperparameters
    model = SVC()
    model.set_params(**params)
    # define test harness
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    # calculate 10-fold cross validation
    result = cross_val_score(model, X, y, cv=cv, n_jobs=-1, scoring='accuracy')
    # calculate the mean of the scores
    estimate = np.mean(result)
Ejemplo n.º 10
0
             # ,'n_iter':1
             ,
             'verbose': True,
             'random_state': 0
         },
         'variable': {
             'learning_rate': [0.1, 0.01, 0.005],
             'num_leaves': linspace(10, 1010, 100, dtype=int),
             'max_depth': linspace(2, 8, 6, dtype=int),
             'min_samples_split': linspace(200, 2200, 10, dtype=int),
             'min_samples_leaf': linspace(50, 550, 10, dtype=int)
         }
     },
     'skopt_params': [
         space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'),
         space.Integer(1, 30, name='max_depth'),
         space.Integer(2, 100, name='num_leaves'),
         space.Integer(200, 2000, name='min_samples_split'),
         space.Integer(50, 500, name='min_samples_leaf'),
     ],
     'fit_params': {
         'verbose': True
     }
 },
 'fund_vars': {
     '_fund_value_st': 1000000  #£10,000
     ,
     '_trade_cost': 250  #£2.50
     ,
     '_investment_limit_min_val': 100000  #£1,000
     ,
Ejemplo n.º 11
0
        y_pred = model.predict(x_val)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        errors.append(rmse)
    return np.mean(errors)
        

if __name__ == '__main__':
    df = pd.read_csv('input/train.csv')
    df = df.drop('id', axis=1)
    x = df.drop('target', axis=1).values
    y = df.target.values

    param_space = [
        space.Real(0.01, 0.1, name='eta'),
        space.Real(0.05, 1.0, name='gamma'),
        space.Integer(3, 25, name ='max_depth'),
        space.Integer(1, 7, name='min_child_weight'),
        space.Real(0.6, 1.0, name='subsample'),
        space.Real(0.6, 1.0, name='colsample_bytree'),
        space.Real(0.01, 1.0, name='lambda'),
        space.Real(0.0, 1.0, name='alpha')
    ]

    param_names = ['eta', 'gamma', 'max_depth', 'min_child_weight',
                   'subsample', 'colsample_bytree', 'lambda', 'alpha']
    
    optimization_function = partial(
        optimize,
        param_names=param_names,
        x=x,
        y=y
Ejemplo n.º 12
0
def init_search_space_dict(test=False) -> dict:
	search_space_dict = dict(
		dim_batch_sampling_method=space.Categorical(categories=['random_geometric', 'random_uniform', 'systematic_uniform'],
													name='batch_sampling_method'),
		dim_window_size=space.Integer(low=10, high=1000, name='window_size'),
		dim_stride=space.Integer(low=1, high=10, name='stride'),
		dim_batch_size=space.Integer(low=10, high=1000, name='batch_size'),
		dim_num_training_steps=space.Integer(low=10000, high=5000000, name='num_training_steps'),
		dim_learning_rate=space.Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate'),
		dim_geometric_decay=space.Real(low=1e-6, high=1, prior='log-uniform', name='geometric_decay'),
		dim_conv_layers_seperable=space.Categorical(categories=[True, False], name='conv_layers_separable'),
		dim_len_conv1_filters=space.Integer(low=2, high=10, name='len_conv1_filters'),
		dim_num_conv1_features=space.Integer(low=1, high=64, name='num_conv1_features'),
		dim_num_conv2_features=space.Integer(low=8, high=128, name='num_conv2_features'),
		dim_num_fc1_neurons=space.Integer(low=8, high=32, name='num_fc1_neurons'),
		### Third conv layer doesn't work yet
		dim_model_ending=space.Categorical(categories=['one_fc_layer', 'two_fc_layers', 'third_conv_layer'], name='model_ending'),
		dim_dropout_keep_prob=space.Real(low=.1, high=.9, name='dropout_keep_prob'),
	)
	if test:
		search_space_dict.update({'dim_batch_size': space.Integer(low=10, high=30, name='batch_size'),
								  'dim_num_training_steps': space.Integer(low=2, high=4, name='num_training_steps'),
								  'dim_window_size': space.Integer(low=10, high=50, name='window_size'),
								  'dim_stride': space.Integer(low=1, high=2, name='stride')})
	return search_space_dict
Ejemplo n.º 13
0
    # return the negative mean fold accuracy (since wwe minimize)
    return -1.0 * np.array(accuracies).mean()

# Everything needs to be done in a cross vlaidation loop
if __name__ == '__main__':

    # Bayesian (Gaussian Process Optimisation)
    # https://scikit-optimize.github.io/stable/modules/generated/skopt.gp_minimize.html
    df = pd.read_csv('../data/datasets_11167_15520_train.csv')
    print('%i predictive features'%(len(df.columns)-1))
    X = df.drop('price_range', axis=1).values
    y = df['price_range'].values

    param_space =[
        space.Real(0.1, 1, prior= 'uniform', name='max_features'),
        space.Integer(100,1000, name='n_estimators'),
        space.Integer(5, 25, name='min_samples_leaf'),
        space.Categorical(['gini', 'entropy'], name='criterion')
    ]

    param_names = [
        'max_features',
        'n_estimators',
        'min_samples_leaf',
        'criterion'
    ]

    optimization_function = partial(
        optimize,
        param_names = param_names,
        x=X,
Ejemplo n.º 14
0
    # split predictors from label
    train_predictors, train_targets = splitTargetFromLabels(train_data,
                                                            target_column=target_column)

    # make an instance of dataSplitter class
    splitData = DataSplitter(train_predictors, train_targets, target_column)

    # get monthly splits
    cv_dict = splitData.splitByMonth()
    cv_splits = list(zip(cv_dict['train_indices'], cv_dict['test_indices']))

    # tune RF using Bayes over monthly cv splits

    #----------------------------------------------------------------------------------------#
    rf_hyperparameter_space = [
        space.Integer(100, 1000, name='n_estimators'),
        space.Integer(2, 10, name='max_depth'),
        space.Real(0.5, 1, prior='uniform', name='max_features'),
        space.Real(0.00001, 10.0, prior='log-uniform', name='learning_rate'),
        space.Real(0.5, 1, prior='uniform', name='subsample'),
    ]

    param_names = ['n_estimators', 'max_depth', 'max_features', 'learning_rate',
                   'subsample']

    optimization_function = partial(
        optimize_skopt,
        param_names=param_names,
        examples=train_predictors[predictor_columns],
        targets=train_targets,
        splits=cv_splits,
Ejemplo n.º 15
0
    base_resnet152 = tf.keras.applications.ResNet152V2(input_shape=(600, 200, 3), include_top=False)
    x = base_resnet152.output
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dropout(drop)(x)

    x = tf.keras.layers.Dense(hidden, activation="elu")(x)
    x = tf.keras.layers.Dropout(drop)(x)

    output_layer = tf.keras.layers.Dense(1)(x)
    resnet152v2 = tf.keras.Model(inputs=base_resnet152.input, outputs=output_layer)

    resnet152v2.compile(tf.keras.optimizers.Adam(lr=lr), tf.keras.losses.MeanSquaredError(), ["mae", "accuracy"])


search_space = [space.Real(2,6, name='lr'),
         space.Integer(300,2000, name='hidden'),
         space.Real(0.01,0.7,name='drop'),
         space.Integer(1,4,name='batch_size')]

@use_named_args(search_space)
def evaluate_func(**kwargs):
    model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=init_resnet50v2, epochs=1)
    model.set_params(**kwargs)

    x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.33, random_state=42)

    fit_model = model.fit(x_train, y_train, validation_split=0.2, callbacks=[callb, callb2], shuffle=True)

    score = mean_squared_error(y_test, model.predict(x_test))
    print("score", score)
    return -1 * np.mean(accuracies)


if __name__ == "__main__":
    # read the training data
    df = pd.read_csv('dataset/train.csv')

    # here we have training features
    x = df.drop('price_range' , axis = 1).values
    # and the targets
    y = df.price_range.values

    # define a parameter sapce
    param_space = [
        # max_depth is an integer between 3 and 10
        space.Integer(3,15, name='max_depth'),
        # n_estimator is an integer between 50 and 1500
        space.Integer(100, 1500 , name="n_estimators"),
        # criterion is a category. here we define list of categories
        space.Categorical(['gini' , 'entropy'] ,  name='criterion'),
        # you can also have real numbered sapce and define a 
        # distribution you want to pick it from
        space.Real(0.01 , 1 , prior='uniform' , name='max_features')
    ]

    # make a list of params names 
    # this has to be same order as the search sapce
    # inside the main function
    param_names = [
        'max_depth',
        'n_estimators',
        model.fit(xtrain,ytrain)
        pred = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest,pred)
        accuracies.append(fold_acc)

    return -1.0 * np.mean(accuracies) 


#read the dataset
if __name__ == '__main__':
    df = pd.read_csv('input/train.csv')
    X = df.drop('price_range',axis = 1).values
    y = df.price_range.values

    param_space = [
        space.Integer(3,15, name = 'max_depth'),
        space.Integer(100,600, name = 'n_estimators'),
        space.Categorical(['gini','entropy'],name = 'criterion'),
        space.Real(0.01,1,prior = 'uniform',name = 'max_features')
    ]

    param_names = [
        'max_depth',
        'n_estimators',
        'criterion',
        'max_features'
    ]

    optimization_function = partial(
        optimize, param_names = param_names, x=X, y = y
    )
Ejemplo n.º 18
0
        ),
        metric=parsed_args.metric,
    )
    training_message = "Finished training with cumulative z-error {:.2f}".format(
        result.get_cum_state_error().flatten()[1])
    print(training_message)
    return result.get_cum_state_error().flatten()[1]


if __name__ == "__main__":
    try:
        optimization_space = [
            space.Real(2., 35.0),  # z-scale
            space.Real(0.3, 1.0),  # zdot_reward
            space.Real(2., 5.),  # action reward
            space.Integer(1, 3),  # exploration
            space.Real(1e-5, 3e-2, prior="log-uniform"),  # tolerance
            space.Integer(16, 32),  # Maxmem * 50
            space.Integer(13, 18),  # ka
            space.Real(1e-3, 1., prior="log-uniform"),  # alpha a
            space.Integer(18, 24),  # kc
            space.Real(1e-3, 1., prior="log-uniform"),  # alpha c
            space.Integer(10, 20),  # Maxmem * 50
            space.Integer(9, 12),  # kpm
            space.Real(3e-8, 3e-4, prior="log-uniform"),  # pred_tol pm
            space.Real(0.75, 0.95),  # lambda
            space.Real(0.90, 0.97),  # gamma
        ]

        print("Starting new optimization run.")
Ejemplo n.º 19
0
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation="softmax")
    ])
    model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError(), metrics=['accuracy'])
    return model"""


# Search space for the hyperparameter optimization
search_space = [
    space.Real(2, 6,
               name='lr'),  # 1e-7, 1e-2   # Consider lr between 10^-6 and 1
    space.Real(0, 0.7, name='drop1'),
    space.Real(0, 0.7, name='drop2'),
    space.Real(0, 0.15, name='loss1'),
    space.Real(0, 0.15, name='loss2'),
    space.Integer(1, 32, name='batch_size')
]


# Splits the data to create the test and validation sets and trains the model. Evaluates a given Configuration and
# creates a .txt file that stores the current values of the parameters after every iteration. Returns the minimal
# reached loss across all iterations.
@use_named_args(search_space)
def evaluate_func(**kwargs):
    model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=alexnet,
                                                          epochs=2)
    model.set_params(**kwargs)

    x_train, x_test, y_train, y_test = train_test_split(x_data,
                                                        y_data,
                                                        test_size=0.33,
Ejemplo n.º 20
0
    # split predictors from label
    train_predictors, train_targets = splitTargetFromLabels(
        train_data, target_column=target_column)

    # make an instance of dataSplitter class
    splitData = DataSplitter(train_predictors, train_targets, target_column)

    # get monthly splits
    cv_dict = splitData.splitByMonth()
    cv_splits = list(zip(cv_dict['train_indices'], cv_dict['test_indices']))

    # tune RF using Bayes over monthly cv splits
    #----------------------------------------------------------------------------------------#
    rf_hyperparameter_space = [
        space.Integer(2, 500, name='min_samples_leaf'),
        space.Integer(100, 500, name='n_estimators'),
        space.Integer(3, 25, name='max_depth'),
        space.Real(0.01, 1, prior='uniform', name='max_features'),
        space.Real(0.01, 1, prior='uniform', name='ccp_alpha')
    ]

    param_names = [
        'min_samples_leaf', 'n_estimators', 'max_depth', 'max_features',
        'ccp_alpha'
    ]

    optimization_function = partial(
        optimize_skopt,
        param_names=param_names,
        examples=train_predictors[predictor_columns],
Ejemplo n.º 21
0
        accuracies.append(fold_accuracy)

    return -1 * np.mean(accuracies)

if __name__ == "__main__":
    df = pd.read_csv(file)

    # Price range is the targe variable 
    X= df.drop("price_range", axis = 1).values
    y = df.price_range.values

    # define a param space 

    param_space = [
            # max_depth is an integer between 3 and 10
            space.Integer(3, 15, name="max_depth"),
            # n_estimators is an integer between 50 and 1500
            spcae.Integer(100, 1500, name="n_estimators")
            #criterion is a category
            space.Categorical(["gini", "entropy"], name ="criterion")
            # You can also have a real numbered space and define a distribution you want to pick it from 
            space.Real(0.01, 1, prior="uniform", name = "max_features")

    ]
    # make a list of param names
    # SAME ORDER AS THE PARAM SPACE 

    param_names = [
            "max_depth",
            "n_estimators]",
            "criterion",