Example #1
0
    def full_grid(self):
        #This function parses the param_grid variable from the user and sets up the
        #parameter space for Bayesian search

        self.param_types = [
            self.param_grid[item][0] for item in self.param_grid
        ]
        self.param_lst = []
        for i, item in enumerate(self.param_grid):
            if self.param_types[i] in ['grid', 'categorical']:
                self.param_lst.append(self.param_grid[item][1])
            else:
                self.param_lst.append(self.param_grid[item][1:])

        self.param_names = [item for item in self.param_grid]
        self.dimensions = []
        self.func_args = self.get_func_args(self.fit)

        for types, vals, names in zip(self.param_types, self.param_lst,
                                      self.param_names):
            if types in ['int', 'discrete']:
                lb = vals[0]
                ub = vals[1]
                self.dimensions.append(Integer(low=lb, high=ub, name=names))
            elif types in ['float', 'continuous']:
                lb = vals[0]
                ub = vals[1]
                self.dimensions.append(Real(low=lb, high=ub, name=names))
            elif types in ['grid', 'categorical']:
                real_grid = vals
                self.dimensions.append(
                    Categorical(categories=tuple(real_grid), name=names))
            else:
                raise Exception(
                    '--error: the param types must be one of int/discrete or float/continuous or grid/categorical, this type is not avaiable: `{}`'
                    .format(types))
Example #2
0
 def __init__(self, skopt_args=None, space=None):
     super().__init__(space)
     if skopt is None:
         raise ValueError('scikit-optimize is not installed')
     skopt_dims = []
     param_names = []
     for n, p in self.space.named_params():
         if isinstance(p, Numeric):
             if p.is_int():
                 sd = Integer(*p.bound, name=n)
             else:
                 sd = Real(*p.bound, name=n)
         elif isinstance(p, ParamCategorical):
             sd = Categorical(p.choices, name=n)
         else:
             continue
         skopt_dims.append(sd)
         param_names.append(n)
     skopt_args = skopt_args or {}
     skopt_args['dimensions'] = skopt_dims
     if 'random_state' not in skopt_args:
         skopt_args['random_state'] = int(time.time())
     self.param_names = param_names
     self.skoptim = Optimizer(**skopt_args)
 def run(self):
     self.x0 = None
     self.y0 = None
     space = [
         Real(self.start, self.end, name=x) for x in self.matrices_names
     ]
     self.res = gp_minimize(self.obiettivo,
                            space,
                            base_estimator=None,
                            n_calls=self.n_calls,
                            n_random_starts=self.n_random_starts,
                            acq_func='gp_hedge',
                            acq_optimizer='auto',
                            x0=self.x0,
                            y0=self.y0,
                            random_state=None,
                            verbose=self.verbose,
                            callback=None,
                            n_points=self.n_points,
                            n_restarts_optimizer=10,
                            xi=self.step,
                            kappa=1.96,
                            noise='gaussian',
                            n_jobs=self.n_cpu)
Example #4
0
def orion_space_to_skopt_space(orion_space):
    """Convert Oríon's definition of problem's domain to a skopt compatible."""
    dimensions = []
    for key, dimension in orion_space.items():
        #  low = dimension._args[0]
        #  high = low + dimension._args[1]
        low, high = dimension.interval()
        # NOTE: A hack, because orion priors have non-inclusive higher bound
        #       while scikit-optimizer have inclusive ones.
        # pylint: disable = assignment-from-no-return
        high = numpy.nextafter(high, high - 1)
        shape = dimension.shape
        assert not shape or len(shape) == 1
        if not shape:
            shape = (1, )
        # Unpack dimension
        for i in range(shape[0]):
            dimensions.append(
                Real(name=key + '_' + str(i),
                     prior='uniform',
                     low=low,
                     high=high))

    return Space(dimensions)
def getPipeRC(num_features):
    """
    Return a pipeline and a search space for a RidgeClassifier.

    Parameters
    ----------
    num_features: int
        The number of features that the estimator will be trained on.

    Returns
    -------
    Pipeline, dict
        A pipeline object representing an estimator followed
        by a dictionary representing a search space for Bayesian
        hyperparameter optimization.

    """
    from skopt.space import Real
    rc = RidgeClassifier(solver='auto')
    search_space = {
        'ridgeclassifier__alpha': Real(MIN_SEARCH, 1.0, prior="uniform")
    }
    return (Pipeline([('ss', StandardScaler()),
                      ('ridgeclassifier', rc)]), search_space)
 def indicator_space() -> List[Dimension]:
     return [
         # Base Timeframe
         Real(0.005, 0.015, name='base-bbdelta'),
         Real(0.01, 0.03, name='base-closedelta'),
         Real(0.10, 0.75, name='base-tail'),
         Real(0.75, 1.1, name='base-bblower'),
         Integer(5, 50, name='base-vms'),
         # Informative Timeframe
         Categorical(['lower', 'upper', 'both', 'none'], name='inf-guard'),
         Real(0.70, 0.99, name='inf-pct-adr-top'),
         Real(0.01, 0.20, name='inf-pct-adr-bot'),
         # Extra BTC/ETH Stakes
         Integer(10, 70, name='xtra-inf-stake-rmi'),
         Integer(10, 70, name='xtra-base-stake-rmi'),
         Integer(10, 70, name='xtra-base-fiat-rmi'),
         # Extra BTC/STAKE if not in whitelist
         Integer(10, 70, name='xbtc-base-rmi'),
         Integer(10, 70, name='xbtc-inf-rmi')
     ]
Example #7
0
def optimize_xg(model_xg, X_tr, y_tr, X_vl=None, y_vl=None):
    space = [
        Integer(50, 500, name='n_estimators'),
        Integer(1, 5, name='max_depth'),
        Integer(1, 10, name='min_child_weight'),
        Real(10 ** -5, 1, "log-uniform", name='learning_rate'),
        Integer(1, 10, name='max_delta_step'),

        Real(.1, 1, name='subsample'),
        Real(.1, 1, name='colsample_bytree'),

        Real(10 ** -5, 1, "log-uniform", name='reg_alpha'),
        Real(10 ** -5, 1, "log-uniform", name='reg_lambda'),
        Real(10 ** -5, 10, "log-uniform", name='gamma'),
    ]

    @use_named_args(space)
    def objective(**params):
        model_xg.set_params(**params)

        if X_vl is None or y_vl is None:
            scores = cross_val_score(model_xg, X_tr, y_tr, cv=4, scoring="neg_mean_absolute_error")
            score = -np.mean(scores)
        else:
            model_xg.fit(X_tr, y_tr)
            y_vl_pred = model_xg.predict(X_vl)
            score = mean_absolute_error(y_vl, y_vl_pred)

        return score

    resp = gp_minimize(
        objective,
        space,
        n_calls=100,
        random_state=42,
        verbose=True,
        n_jobs=-1,
    )

    return resp
def test_space_consistency():
    # Reals (uniform)
    s1 = Space([Real(0.0, 1.0)]).rvs(n_samples=10, random_state=0)
    s2 = Space([Real(0.0, 1.0)]).rvs(n_samples=10, random_state=0)
    s3 = Space([Real(0, 1)]).rvs(n_samples=10, random_state=0)
    s4 = Space([(0.0, 1.0)]).rvs(n_samples=10, random_state=0)
    s5 = Space([(0.0, 1.0, "uniform")]).rvs(n_samples=10, random_state=0)
    assert_array_equal(s1, s2)
    assert_array_equal(s1, s3)
    assert_array_equal(s1, s4)
    assert_array_equal(s1, s5)

    # Reals (log-uniform)
    s1 = Space([Real(10**-3.0, 10**3.0,
                     prior="log-uniform")]).rvs(n_samples=10, random_state=0)
    s2 = Space([Real(10**-3.0, 10**3.0,
                     prior="log-uniform")]).rvs(n_samples=10, random_state=0)
    s3 = Space([Real(10**-3, 10**3, prior="log-uniform")]).rvs(n_samples=10,
                                                               random_state=0)
    s4 = Space([(10**-3.0, 10**3.0, "log-uniform")]).rvs(n_samples=10,
                                                         random_state=0)
    assert_array_equal(s1, s2)
    assert_array_equal(s1, s3)
    assert_array_equal(s1, s4)

    # Integers
    s1 = Space([Integer(1, 5)]).rvs(n_samples=10, random_state=0)
    s2 = Space([Integer(1.0, 5.0)]).rvs(n_samples=10, random_state=0)
    s3 = Space([(1, 5)]).rvs(n_samples=10, random_state=0)
    assert_array_equal(s1, s2)
    assert_array_equal(s1, s3)

    # Categoricals
    s1 = Space([Categorical(["a", "b", "c"])]).rvs(n_samples=10,
                                                   random_state=0)
    s2 = Space([Categorical(["a", "b", "c"])]).rvs(n_samples=10,
                                                   random_state=0)
    assert_array_equal(s1, s2)
    def _hyperparameter_optimization(self,
                                     num_iterations=30,
                                     save_results=True,
                                     display_plot=False,
                                     batch_size=20,
                                     n_random_starts=10,
                                     use_TPU=False,
                                     transfer_model='Inception',
                                     cutoff_regularization=False,
                                     min_accuracy=None):
        """
        min_accuracy: minimum value of categorical accuracy we want after 1 iteration
        num_iterations: number of hyperparameter combinations we try
        n_random_starts: number of random combinations of hyperparameters first tried
        """
        self.min_accuracy = min_accuracy
        self.batch_size = batch_size
        self.use_TPU = use_TPU
        self.transfer_model = transfer_model
        self.cutoff_regularization = cutoff_regularization

        #import scikit-optimize libraries
        from skopt import gp_minimize
        from skopt.space import Real, Categorical, Integer
        from skopt.plots import plot_convergence
        from skopt.utils import use_named_args

        #declare the hyperparameters search space
        dim_epochs = Integer(low=1, high=10, name='epochs')
        dim_hidden_size = Integer(low=6, high=2048, name='hidden_size')
        dim_learning_rate = Real(low=1e-6,
                                 high=1e-2,
                                 prior='log-uniform',
                                 name='learning_rate')
        dim_dropout = Real(low=0, high=0.9, name='dropout')
        dim_fine_tuning = Categorical(categories=[True, False],
                                      name='fine_tuning')
        dim_nb_layers = Integer(low=1, high=3, name='nb_layers')
        dim_activation = Categorical(categories=['relu', 'tanh'],
                                     name='activation')
        dim_include_class_weight = Categorical(categories=[True, False],
                                               name='include_class_weight')

        dimensions = [
            dim_epochs, dim_hidden_size, dim_learning_rate, dim_dropout,
            dim_fine_tuning, dim_nb_layers, dim_activation,
            dim_include_class_weight
        ]

        #read default parameters from last optimization
        try:
            with open(
                    parentdir +
                    '/data/trained_model/hyperparameters_search.pickle',
                    'rb') as f:
                sr = dill.load(f)
            default_parameters = sr.x
            print('parameters of previous optimization loaded!')

        except:
            #fall back default values
            default_parameters = [5, 1024, 1e-4, 0, True, 1, 'relu', True]

        self.number_iterations = 0

        #declare the fitness function
        @use_named_args(dimensions=dimensions)
        def fitness(epochs, hidden_size, learning_rate, dropout, fine_tuning,
                    nb_layers, activation, include_class_weight):

            self.number_iterations += 1

            #print the hyper-parameters
            print('epochs:', epochs)
            print('hidden_size:', hidden_size)
            print('learning rate:', learning_rate)
            print('dropout:', dropout)
            print('fine_tuning:', fine_tuning)
            print('nb_layers:', nb_layers)
            print('activation:', activation)
            print('include_class_weight', include_class_weight)
            print()

            #fit the model
            self.fit(epochs=epochs,
                     hidden_size=hidden_size,
                     learning_rate=learning_rate,
                     dropout=dropout,
                     fine_tuning=fine_tuning,
                     nb_layers=nb_layers,
                     activation=activation,
                     include_class_weight=include_class_weight,
                     batch_size=self.batch_size,
                     use_TPU=self.use_TPU,
                     transfer_model=self.transfer_model,
                     min_accuracy=self.min_accuracy,
                     cutoff_regularization=self.cutoff_regularization)

            #extract fitness
            fitness = self.fitness

            print('CALCULATED FITNESS AT ITERATION', self.number_iterations,
                  'OF:', fitness)
            print()

            del self.model
            K.clear_session()

            return -1 * fitness

        # optimization
        self.search_result = gp_minimize(
            func=fitness,
            dimensions=dimensions,
            acq_func='EI',  # Expected Improvement.
            n_calls=num_iterations,
            n_random_starts=n_random_starts,
            x0=default_parameters)

        if save_results:
            if not os.path.exists(parentdir + '/data/trained_models'):
                os.makedirs(parentdir + '/data/trained_models')

            with open(
                    parentdir +
                    '/data/trained_models/hyperparameters_dimensions.pickle',
                    'wb') as f:
                dill.dump(dimensions, f)

            with open(
                    parentdir +
                    '/data/trained_models/hyperparameters_search.pickle',
                    'wb') as f:
                dill.dump(self.search_result.x, f)

            print("Hyperparameter search saved!")

        if display_plot:
            plot_convergence(self.search_result)

        #build results dictionary
        results_dict = {
            dimensions[i].name: self.search_result.x[i]
            for i in range(len(dimensions))
        }
        print('Optimal hyperameters found of:')
        print(results_dict)
        print()
        print('Optimal fitness value of:', -float(self.search_result.fun))
Example #10
0
            S = Snew
        it = it + 1

    yhat_valid = np.sum(
        np.multiply((P[x_valid[:, 0], :]), (Q[x_valid[:, 1], :])), 1)
    #yhat_valid=np.round(yhat_valid,decimals=4)
    RMSE = sqrt((y_valid - yhat_valid) @ (y_valid - yhat_valid) / y_valid.size)
    #MAE=sum(abs((y_valid-yhat_valid)))/y_valid.size
    return RMSE, yhat_valid


# 2-layered gssvd rmse focused k =4,...,10

# In[ ]:

lp = Real(low=1e-7, high=1, prior='uniform', name='lp')
ls = Real(low=1e-7, high=1, prior='uniform', name='ls')
la = Real(low=1e-7, high=1, prior='uniform', name='la')
lq = Real(low=1e-7, high=1, prior='uniform', name='lq')
lt = Real(low=1e-7, high=1, prior='uniform', name='lt')
lb = Real(low=1e-7, high=1, prior='uniform', name='lb')
Lp = Real(low=1e-7, high=1, prior='uniform', name='Lp')
Ls = Real(low=1e-7, high=1, prior='uniform', name='Ls')
La = Real(low=1e-7, high=1, prior='uniform', name='La')
Lq = Real(low=1e-7, high=1, prior='uniform', name='Lq')
Lt = Real(low=1e-7, high=1, prior='uniform', name='Lt')
Lb = Real(low=1e-6, high=1, prior='uniform', name='Lb')
dimensions = [lp, ls, la, lq, lt, lb, Lp, Ls, La, Lq, Lt, Lb]

# In[ ]:
 def setup_dimensions(self, x_min, x_max, y_min, y_max, n_blocks):
     dimensions = []
     for i in range(n_blocks):
         dimensions.append(Real(x_min, x_max))
         dimensions.append(Real(y_min, y_max))
     return dimensions
Example #12
0
 def get_hyperspace(self):
     """
     Create integer HyperSpaces.
     """
     return Real(self.space0_low, self.space0_high, prior=self.prior, transform=self.transform), \
            Real(self.space1_low, self.space1_high, prior=self.prior, transform=self.transform)
Example #13
0
    model.add(Dense(3, activation = 'softmax'))
    return model

def model_fit(model, x_train, y_train, x_valid, y_valid, b_size):
    adam = Adam(lr = 0.005)  
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    callback = EarlyStopping(monitor = 'val_acc', patience = 10, verbose = 1, mode = 'auto')
    model.fit(x_train, np_utils.to_categorical(y_train), epochs = 1000, batch_size = b_size, validation_data = (x_valid, np_utils.to_categorical(y_valid)), 
              callbacks = [callback])
    loss, acc = model.evaluate(x_train, np_utils.to_categorical(y_train))
    return acc

space = [
        Categorical([20, 16, 12, 8], name = 'filter0'), 
        Categorical([48, 32, 24, 20], name = 'filter1'),
        Real(0, 0.5, name = 'rate0'),
        Real(0, 0.5, name = 'rate1'),
        Categorical([32, 64, 128], name = 'b_size')
        ]

@use_named_args(space)
def objective0(filter0, filter1, rate0, rate1, b_size):
    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits = 3)
    for train_index, valid_index in skf.split(train_x, train_y):
        x_train, x_valid = train_x[train_index], train_x[valid_index]
        y_train, y_valid = train_y[train_index], train_y[valid_index]
        model = lstmmodel(filter0, filter1, rate0, rate1)
        fitting = model_fit(model, x_train, y_train, x_valid, y_valid, b_size) 
    return -fitting
Example #14
0
def hyperparams_tuning(recommender_class, URM_train, URM_validation, URM_test):

    # Step 1: Import the evaluator objects

    print("Evaluator objects ... ")

    cutoff = 5
    evaluator_validation = EvaluatorHoldout(URM_validation,
                                            cutoff_list=[cutoff])
    evaluator_test = EvaluatorHoldout(URM_test,
                                      cutoff_list=[cutoff, cutoff + 5])
    # evaluator_validation_earlystopping = EvaluatorHoldout(URM_train, cutoff_list=[cutoff, cutoff+5], exclude_seen=False)

    # Step 2: Create BayesianSearch object
    print("BayesianSearch objects ... ")

    parameterSearch = SearchBayesianSkopt(
        recommender_class,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test)

    # Step 3: Define parameters range
    print("Parameters range ...")

    # n_cases = 8
    # n_random_starts =  int(n_cases / 3) # 5
    n_cases = 2
    metric_to_optimize = "MAP"
    output_file_name_root = "{}_metadata.zip".format(
        recommender_class.RECOMMENDER_NAME)

    hyperparameters_range_dictionary = {}
    hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
    hyperparameters_range_dictionary["l1_ratio"] = Real(low=1e-5,
                                                        high=1.0,
                                                        prior='log-uniform')
    hyperparameters_range_dictionary["alpha"] = Real(low=1e-3,
                                                     high=1.0,
                                                     prior='uniform')

    # earlystopping_keywargs = {"validation_every_n": 5,
    #                           "stop_on_validation": True,
    #                           "evaluator_object": evaluator_validation_earlystopping, # or evaluator_validation
    #                           "lower_validations_allowed": 5,
    #                           "validation_metric": metric_to_optimize,
    #                           }

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={}  # earlystopping_keywargs 
    )

    output_folder_path = "../results/result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Step 4: run

    best_parameters = parameterSearch.search(
        recommender_input_args,  # the function to minimize
        parameter_search_space=
        hyperparameters_range_dictionary,  # the bounds on each dimension of x
        n_cases=n_cases,  # the number of evaluations of f
        n_random_starts=1,  # the number of random initialization points
        #n_random_starts = int(n_cases/3),
        save_model="no",
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root,
        metric_to_optimize=metric_to_optimize)

    print("best_parameters", best_parameters)

    # Step 5: return best_parameters
    # from utils.DataIO import DataIO
    # data_loader = DataIO(folder_path=output_folder_path)
    # search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")
    # print("search_metadata", search_metadata)

    # best_parameters = search_metadata["hyperparameters_best"]

    return best_parameters
def doGradBoost(X,
                y,
                X_test,
                y_test,
                n_jobs,
                feature_labels,
                class_labels,
                bayesOpt=False,
                acqFunc='',
                num_iter=25):
    #     MIN_SEARCH = 2e-12
    #     num_features = 67
    #     search_space = {
    #         'gb__n_estimators': Integer(50, 150),
    #         'gb__max_features': Real(MIN_SEARCH, 1.0, prior='uniform'),
    #         'gb__criterion': Categorical(['friedman_mse',
    #                                              'mse']),  # mae is very slow.
    #         'gb__min_samples_split': Real(MIN_SEARCH,
    #                                              1.0,
    #                                              prior='log-uniform'),
    #         'gb__min_samples_leaf': Real(MIN_SEARCH,
    #                                             0.5,
    #                                             prior='log-uniform'),
    #         'gb__max_depth': Integer(2, num_features),
    #     }
    search_space = {
        'rf__max_depth': Integer(1, 55),
        "rf__max_features": Real(.05, 1.0, prior='log-uniform')
    }
    parameters = {
        'gb__max_depth': [i for i in range(5, 60, 5)],
        "gb__max_features": [i / 10.0 for i in range(1, 11)]
    }
    best_estimator = executeML(X,
                               y,
                               X_test,
                               y_test,
                               n_jobs,
                               feature_labels,
                               class_labels,
                               getPipeRF(),
                               parameters,
                               "GB" + acqFunc,
                               bayesOpt=bayesOpt,
                               search_space=search_space,
                               n_iter=num_iter,
                               acq_func=acqFunc)
    try:
        imp = best_estimator.named_steps['gb'].feature_importances_
        feature_importances = sorted(zip(feature_labels, imp),
                                     key=lambda fi: fi[1],
                                     reverse=True)
        print("")
        print("Grad boost feature importances.")
        i = 1
        for item in feature_importances:
            print(i, str(item))
            i += 1
        print("")
        sys.stdout.flush()
    except AttributeError:
        print("How to get the feature importances for GB?")
    return best_estimator
#INPATH = '/imdata/error_log_analysis/data/'
OUTPATH = '/nfshome/llayer/AIErrorLogAnalysis/experiments/'

# Average the vectors
AVG_W2V = False
FOLDS = 3

# Include counts
MSG_ONLY = False
PRUNING = 'Neg'

if AVG_W2V == False:
    
    # Skopt dimensions
    SKOPT_DIM = [
        Real(        low=1e-5, high=1e-3, prior='log-uniform', name='learning_rate'     ),
        Real(        low=1e-3, high=0.1, prior='log-uniform', name='dropout'     ),
        #Real(        low=1e-4, high=0.9,  prior="log-uniform", name='l2_regulizer'   ),
        Integer(     low=5, high=32,                          name='embedding'   ),
        Integer(     low=2, high=32,                          name='rnn_units'   ),
        #Integer(     low=2, high = 20,                       name = 'units_site'    ),
        Integer(     low=1,    high=5,                         name='dense_layers'      ),
        Integer(     low=10,    high=50,                         name='dense_units'      ),
        #Integer(     low=2,    high=20,                         name='att_units'      ),
        #Integer(     low=0,    high=1,                         name='encode_sites'      ),
        #Integer(     low=0,    high=1,                         name='train_embedding'      ),
        ]

    # batch_size and epochs 
    BATCH_SIZE = 1
    MAX_EPOCHS = 12
Example #17
0
    print('tp:', true_positive, 'fp:', false_positive)
    print('fn:', false_negative, 'tn:', true_negative)
    precision = float(true_positive) / (float(true_positive) +
                                        float(false_positive))
    recall = float(true_positive) / (float(true_positive) +
                                     float(false_negative))
    f1_score = (2 * precision * recall) / (precision + recall)
    acc = (true_positive + true_negative) / (true_positive + false_positive +
                                             false_negative + true_negative)
    TNR = float(true_negative) / (float(false_positive) + float(true_negative))
    return precision, recall, f1_score, acc, TNR


# target params
max_depth = Integer(low=1, high=32, name='max_depth')
min_samples_split = Real(low=0.1, high=1, name='min_samples_split')
min_samples_leaf = Real(low=0.1, high=0.5, name='min_samples_leaf')
max_features = Integer(low=1, high=65, name="max_features")

dimensions = [max_depth, min_samples_split, min_samples_leaf, max_features]
default_parameters = [5, 0.2, 0.2, 15]

# input prepare
# 输入限制,文章数量及每篇文章输入的句子/段落数
file_num_limit = 45614  # total 45614
paras_limit = 20

onehotlabels, stats_features = prepare_input(file_num_limit)

# stats_features 标准化
scaler = preprocessing.StandardScaler()  #实例化
Example #18
0

@pytest.mark.fast_test
@pytest.mark.parametrize("dimensions, normalizations", [
    (((1, 3), (1., 3.)), ('normalize', 'normalize')),
    (((1, 3), ('a', 'b', 'c')), ('normalize', 'onehot')),
])
def test_normalize_dimensions(dimensions, normalizations):
    space = normalize_dimensions(dimensions)
    for dimension, normalization in zip(space, normalizations):
        assert dimension.transform_ == normalization


@pytest.mark.fast_test
@pytest.mark.parametrize(
    "dimension, name", [(Real(1, 2, name="learning rate"), "learning rate"),
                        (Integer(1, 100, name="no of trees"), "no of trees"),
                        (Categorical(["red, blue"], name="colors"), "colors")])
def test_normalize_dimensions(dimension, name):
    space = normalize_dimensions([dimension])
    assert space.dimensions[0].name == name


@pytest.mark.fast_test
def test_use_named_args():
    """
    Test the function wrapper @use_named_args which is used
    for wrapping an objective function with named args so it
    can be called by the optimizers which only pass a single
    list as the arg.
Example #19
0
class CatBoostClassifier(Estimator):
    """
    CatBoost Classifier, a classifier that uses gradient-boosting on decision trees.
    CatBoost is an open-source library and natively supports categorical features.

    For more information, check out https://catboost.ai/
    """
    name = "CatBoost Classifier"
    hyperparameter_ranges = {
        "n_estimators": Integer(4, 100),
        "eta": Real(0.000001, 1),
        "max_depth": Integer(4, 10),
    }
    model_family = ModelFamily.CATBOOST
    supported_problem_types = [
        ProblemTypes.BINARY, ProblemTypes.MULTICLASS,
        ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS
    ]

    SEED_MIN = 0
    SEED_MAX = SEED_BOUNDS.max_bound

    def __init__(self,
                 n_estimators=10,
                 eta=0.03,
                 max_depth=6,
                 bootstrap_type=None,
                 silent=True,
                 allow_writing_files=False,
                 random_state=0,
                 **kwargs):
        random_seed = get_random_seed(random_state, self.SEED_MIN,
                                      self.SEED_MAX)
        parameters = {
            "n_estimators": n_estimators,
            "eta": eta,
            "max_depth": max_depth,
            'bootstrap_type': bootstrap_type,
            'silent': silent,
            'allow_writing_files': allow_writing_files
        }
        parameters.update(kwargs)

        cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`"
        catboost = import_or_raise("catboost", error_msg=cb_error_msg)
        self._label_encoder = None
        # catboost will choose an intelligent default for bootstrap_type, so only set if provided
        cb_parameters = copy.copy(parameters)
        if bootstrap_type is None:
            cb_parameters.pop('bootstrap_type')
        cb_classifier = catboost.CatBoostClassifier(**cb_parameters,
                                                    random_seed=random_seed)
        super().__init__(parameters=parameters,
                         component_obj=cb_classifier,
                         random_state=random_state)

    def fit(self, X, y=None):
        X = _convert_to_woodwork_structure(X)
        cat_cols = list(X.select('category').columns)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_to_woodwork_structure(y)
        y = _convert_woodwork_types_wrapper(y.to_series())

        # For binary classification, catboost expects numeric values, so encoding before.
        if y.nunique() <= 2:
            self._label_encoder = LabelEncoder()
            y = pd.Series(self._label_encoder.fit_transform(y))
        self._component_obj.fit(X, y, silent=True, cat_features=cat_cols)
        return self

    def predict(self, X):
        X = _convert_to_woodwork_structure(X)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        predictions = self._component_obj.predict(X)
        if predictions.ndim == 2 and predictions.shape[1] == 1:
            predictions = predictions.flatten()
        if self._label_encoder:
            predictions = self._label_encoder.inverse_transform(
                predictions.astype(np.int64))
        return _convert_to_woodwork_structure(predictions)

    @property
    def feature_importance(self):
        return self._component_obj.get_feature_importance()
Example #20
0
###############################################################################
if __name__ == "__main__":
    ###################################
    #   Select Optimization Options   #
    ###################################
    #=== Number of Iterations ===#
    n_calls = 10

    #=== Select Hyperparameters of Interest ===#
    hyperp_of_interest_dict = {}
    hyperp_of_interest_dict['num_hidden_layers_encoder'] = Integer(
        5, 10, name='num_hidden_layers_encoder')
    hyperp_of_interest_dict['num_hidden_nodes_encoder'] = Integer(
        100, 1000, name='num_hidden_nodes_encoder')
    # hyperp_of_interest_dict['activation'] = Categorical(['relu', 'elu', 'sigmoid', 'tanh'], name='activation')
    hyperp_of_interest_dict['penalty_js'] = Real(0, 1, name='penalty_js')
    #hyperp_of_interest_dict['batch_size'] = Integer(100, 500, name='batch_size')

    #####################
    #   Initial Setup   #
    #####################
    #=== Generate skopt 'space' list ===#
    space = []
    for key, val in hyperp_of_interest_dict.items():
        space.append(val)

    #=== Hyperparameters ===#
    with open('../config_files/hyperparameters_vae.yaml') as f:
        hyperp = yaml.safe_load(f)
    hyperp = AttrDict(hyperp)
Example #21
0
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_folder_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=30):

    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = BayesianSkoptSearch(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, Random]:

            recommender = recommender_class(URM_train)

            recommender.fit()

            output_file = open(
                output_folder_path + output_file_name_root +
                "_BayesianSearch.txt", "a")
            result_dict, result_baseline = evaluator_validation.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_validation. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(
                            output_folder_path + output_file_name_root +
                            "_best_result_validation", "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            result_dict, result_baseline = evaluator_test.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_test. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(
                            output_folder_path + output_file_name_root +
                            "_best_result_test", "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            output_file.close()

            return

        ##########################################################################################################

        if recommender_class is UserKNNCFRecommender:

            similarity_type_list = [
                'cosine', 'jaccard', "asymmetric", "dice", "tversky"
            ]

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is ItemKNNCFRecommender:

            similarity_type_list = [
                'cosine', 'jaccard', "asymmetric", "dice", "tversky"
            ]

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

    ##########################################################################################################

        if recommender_class is P3alphaRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            hyperparamethers_range_dictionary["alpha"] = Real(low=0,
                                                              high=2,
                                                              prior='uniform')
            hyperparamethers_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            hyperparamethers_range_dictionary["alpha"] = Real(low=0,
                                                              high=2,
                                                              prior='uniform')
            hyperparamethers_range_dictionary["beta"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparamethers_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150)
            hyperparamethers_range_dictionary["num_factors"] = Integer(1, 150)
            hyperparamethers_range_dictionary["reg"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["learning_rate"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150)
            hyperparamethers_range_dictionary["num_factors"] = Integer(1, 150)
            hyperparamethers_range_dictionary["batch_size"] = Categorical([1])
            hyperparamethers_range_dictionary["positive_reg"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["negative_reg"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["learning_rate"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'positive_threshold': 0
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["num_factors"] = Integer(1, 250)

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150)
            hyperparamethers_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            hyperparamethers_range_dictionary["lambda_i"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["lambda_j"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'train_with_sparse_weights': False,
                    'symmetric': False,
                    'positive_threshold': 0
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 10,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            hyperparamethers_range_dictionary["l1_ratio"] = Real(
                low=1e-5, high=1.0, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

    #########################################################################################################

    ## Final step, after the hyperparameter range has been defined for each type of algorithm
        best_parameters = parameterSearch.search(
            recommenderDictionary,
            n_cases=n_cases,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Example #22
0
    externalize = externalfunc(prog='python run_train_ex.py',
                               names = ['par%s'%d for d in range(n_par)])
    
    run_for = 20

    use_func = externalize
    if len(sys.argv)>1:
        do = sys.argv[1]
        if do=='threaded':
            use_func = dummy_func
        elif do=='external':
            use_func = externalize


    dim = [Real(-20, 20) for i in range(n_par)]
    start = time.mktime(time.gmtime())
    res = gp_minimize(
        func=use_func,
        dimensions=dim,
        n_calls = run_for,
        
    )

    print "GPM best value",res.fun,"at",res.x
    #print res
    print "took",time.mktime(time.gmtime())-start,"[s]"
    
    
    o = Optimizer(
        n_initial_points =5,
Example #23
0
class CatBoostRegressor(Estimator):
    """
    CatBoost Regressor, a regressor that uses gradient-boosting on decision trees.
    CatBoost is an open-source library and natively supports categorical features.

    For more information, check out https://catboost.ai/
    """
    name = "CatBoost Regressor"
    hyperparameter_ranges = {
        "n_estimators": Integer(4, 100),
        "eta": Real(0.000001, 1),
        "max_depth": Integer(4, 10),
    }
    model_family = ModelFamily.CATBOOST
    supported_problem_types = [
        ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION
    ]

    SEED_MIN = 0
    SEED_MAX = SEED_BOUNDS.max_bound

    def __init__(self,
                 n_estimators=10,
                 eta=0.03,
                 max_depth=6,
                 bootstrap_type=None,
                 silent=False,
                 allow_writing_files=False,
                 random_state=0,
                 **kwargs):
        random_seed = get_random_seed(random_state, self.SEED_MIN,
                                      self.SEED_MAX)
        parameters = {
            "n_estimators": n_estimators,
            "eta": eta,
            "max_depth": max_depth,
            'bootstrap_type': bootstrap_type,
            'silent': silent,
            'allow_writing_files': allow_writing_files
        }
        parameters.update(kwargs)

        cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`"
        catboost = import_or_raise("catboost", error_msg=cb_error_msg)
        # catboost will choose an intelligent default for bootstrap_type, so only set if provided
        cb_parameters = copy.copy(parameters)
        if bootstrap_type is None:
            cb_parameters.pop('bootstrap_type')
        cb_regressor = catboost.CatBoostRegressor(**cb_parameters,
                                                  random_seed=random_seed)
        super().__init__(parameters=parameters,
                         component_obj=cb_regressor,
                         random_state=random_state)

    def fit(self, X, y=None):
        X = _convert_to_woodwork_structure(X)
        cat_cols = list(X.select('category').columns)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())

        y = _convert_to_woodwork_structure(y)
        y = _convert_woodwork_types_wrapper(y.to_series())

        self._component_obj.fit(X, y, silent=True, cat_features=cat_cols)
        return self

    @property
    def feature_importance(self):
        return self._component_obj.get_feature_importance()
    filename_skopt = "400_RP3_12.pkl"
    filename_csv = "400_RP3.csv"

    dataset_list = Parallel(n_jobs=parallelism)(delayed(gen_dataset)(x + 20)
                                                for x in range(n_dataset))

    eval = Evaluator(dataset_list=dataset_list,
                     type_of_user=type_of_user,
                     parallelism=parallelism,
                     filename_csv=filename_csv)

    hyperparameters = [
        Integer(2, 7),
        Integer(0, 4),
        Integer(0, 30),
        Real(0, 0.75)
    ]

    for _ in range(n_load_and_rerun):

        try:
            with open(filename_skopt, "rb") as f:
                res_loaded = load(f)
                f.close()
            res = forest_minimize(
                eval.eval,  # the function to minimize
                hyperparameters,  # the bounds on each dimension of x
                acq_func=acq_func,  # the acquisition function
                # acq_optimizer=acq_optimizer,  # the acquisition function
                n_calls=n_calls,  # the number of evaluations of f
                n_random_starts=
Example #25
0
from tensorflow.python.keras.layers import Conv2D, Dense, Flatten, CuDNNLSTM, ConvLSTM2D 
from tensorflow.python.keras.callbacks import TensorBoard
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.models import save_model, load_model, Model
from tensorflow.python.keras.utils import multi_gpu_model
from sklearn.preprocessing import MinMaxScaler
# Scikit Optimizer
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args

# This is where you set your hyperparameters for the model

# Hyperparameter tuning
dim_learning_rate = Real(low=1e-5, high=1e-3, prior='log-uniform',
                         name='learning_rate')


dim_num_dense_layers = Integer(low=0, high=2, name='num_dense_layers')

dim_num_epochs = Integer(low=50, high=250, name='num_epochs')

dim_num_conv_layers = Integer(low=1, high=4, name='num_conv_layers')

dim_kernel_size = Integer(low=3, high=10, name='kernel_size')

dim_num_filters = Integer(low=16, high=64, name='num_filters')

dimensions = [dim_learning_rate,
              dim_num_dense_layers,
	      dim_num_epochs,
Example #26
0
# Regular Expression to parse score from text output
REGEX = re.compile(r'Score: (\d\.\d+)')

# Make sure user calls tune.py in the right way
if len(sys.argv) == 2:
    TARGET = int(sys.argv[1])
else:
    print("usage: python tune.py <1/2/3>")
    exit()

# Hyperparameter Space #
space = [
    Integer(50, 250, name="population_size"),
    Integer(2, 6, name="tournament_size"),
    Integer(1, 5, name="crossover_points"),
    Real(0.1, 0.35, name="mutation_probability"),
    Real(1, 2, name="max_sigma"),
    Real(0, 1, name="learning_rate"),
    Real(0, 10, name="novelty_threshold"),
    Real(0, 1, name="linearblend"),
    Real(0, 0.0001, name="linearblend_delta"),
    Integer(2, 6, name="nearestNeighbours")
]


@use_named_args(space)
def evaluate(**parameters: dict):
    """
	Evalute Evolutionary Algorithm using 'parameters' as Hyperparameters
	Since we're dealing with a Stochastic Algorithm, take the mean over several runs as score
	"""
Example #27
0
 def indicator_space() -> List[Dimension]:
     return [
         Real(-1.00, 1.00, name='macd'),
         Integer(-1.00, 1.00, name='macdhist'),
         Integer(40, 90, name='rmi')
     ]
Example #28
0
# search over different model types
pipe = Pipeline([('model', SVC())])

# single categorical value of 'model' parameter is
# sets the model class
# We will get ConvergenceWarnings because the problem is not well-conditioned.
# But that's fine, this is just an example.
linsvc_search = {
    'model': [LinearSVC(max_iter=1000)],
    'model__C': (1e-6, 1e+6, 'log-uniform'),
}

# explicit dimension classes can be specified like this
svc_search = {
    'model': Categorical([SVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'model__degree': Integer(1, 8),
    'model__kernel': Categorical(['linear', 'poly', 'rbf']),
}

opt = WeightedBayesSearchCV(
    pipe,
    # (parameter space, # of evaluations)
    [(svc_search, 40), (linsvc_search, 16)],
    cv=3)

opt.fit(X_train, y_train)

print("val. score: %s" % opt.best_score_)
print("test score: %s" % opt.score(X_test, y_test))
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from utility import HyperParameters, Runner
from model import load_clean_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 24

hyper_parameters = HyperParameters({
    'dt__criterion':
    Categorical(['gini', 'entropy']),
    'dt__max_depth':
    Integer(4, 24),
    'dt__min_samples_leaf':
    Real(0.000001, 0.001),
    'dt__min_samples_split':
    Real(0.000002, 0.002)
})

decision_tree_basic = Pipeline([('mapper', ordinal_data_mapper),
                                ('dt', DecisionTreeClassifier())])


def test_decision_tree():
    runner = Runner('model/experiment/output/decision_tree_basic',
                    load_clean_sample_data_frame(),
                    'arrest',
                    decision_tree_basic,
                    hyper_parameters=hyper_parameters)
    runner.run_classification_search_experiment('roc_auc',
Example #30
0
    def get_sk_dimensions(api_config, transform="normalize"):
        """Help routine to setup skopt search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        sk_types = []
        sk_dims = []
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]
            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
            if param_type == "int":
                # Integer space in sklearn does not support any warping => Need
                # to leave the warping as linear in skopt.
                sk_dims.append(
                    Integer(param_range[0],
                            param_range[-1],
                            transform=transform,
                            name=param_name))
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                sk_dims.append(
                    Integer(0, 1, transform=transform, name=param_name))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                # Leave x-form to one-hot as per skopt default
                sk_dims.append(Categorical(param_values, name=param_name))
            elif param_type == "real":
                # Skopt doesn't support all our warpings, so need to pick
                # closest substitute it does support.
                # prior = "log-uniform" if param_space in ("log", "logit") else "uniform"
                if param_space == "log":
                    prior = "log-uniform"
                elif param_space == "logit":
                    prior = "logit-uniform"
                else:
                    prior = "uniform"
                sk_dims.append(
                    Real(param_range[0],
                         param_range[-1],
                         prior=prior,
                         transform=transform,
                         name=param_name))
            else:
                assert False, "type %s not handled in API" % param_type
            sk_types.append(param_type)
        return sk_dims, sk_types, param_list