Beispiel #1
0
    def __init__(
        self,
        space: Optional[Dict] = None,
        metric: Optional[str] = None,
        mode: Optional[str] = None,
        points_to_evaluate: Optional[List[Dict]] = None,
        n_initial_points: int = 20,
        random_state_seed: Optional[int] = None,
        gamma: float = 0.25,
        max_concurrent: Optional[int] = None,
        use_early_stopped_trials: Optional[bool] = None,
    ):
        assert hpo is not None, (
            "HyperOpt must be installed! Run `pip install hyperopt`.")
        if mode:
            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
        from hyperopt.fmin import generate_trials_to_calculate
        super(HyperOptSearch,
              self).__init__(metric=metric,
                             mode=mode,
                             max_concurrent=max_concurrent,
                             use_early_stopped_trials=use_early_stopped_trials)
        self.max_concurrent = max_concurrent
        # hyperopt internally minimizes, so "max" => -1
        if mode == "max":
            self.metric_op = -1.
        elif mode == "min":
            self.metric_op = 1.

        if n_initial_points is None:
            self.algo = hpo.tpe.suggest
        else:
            self.algo = partial(hpo.tpe.suggest,
                                n_startup_jobs=n_initial_points)
        if gamma is not None:
            self.algo = partial(self.algo, gamma=gamma)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert isinstance(points_to_evaluate, (list, tuple))
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        if random_state_seed is None:
            self.rstate = np.random.RandomState()
        else:
            self.rstate = np.random.RandomState(random_state_seed)

        self.domain = None
        if isinstance(space, dict) and space:
            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
            if domain_vars or grid_vars:
                logger.warning(
                    UNRESOLVED_SEARCH_SPACE.format(par="space",
                                                   cls=type(self)))
                space = self.convert_search_space(space)
            self.domain = hpo.Domain(lambda spc: spc, space)
Beispiel #2
0
    def __init__(self,
                 space,
                 max_concurrent=10,
                 reward_attr="episode_reward_mean",
                 points_to_evaluate=None,
                 **kwargs):
        _import_hyperopt()
        assert hpo is not None, "HyperOpt must be installed!"
        from hyperopt.fmin import generate_trials_to_calculate
        assert type(max_concurrent) is int and max_concurrent > 0
        self._max_concurrent = max_concurrent
        self._reward_attr = reward_attr
        self.algo = hpo.tpe.suggest
        self.domain = hpo.Domain(lambda spc: spc, space)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert type(points_to_evaluate) == list
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        self.rstate = np.random.RandomState()

        super(HyperOptSearch, self).__init__(**kwargs)
def get_iter(fn, space, algo, max_evals, trials=None, rstate=None,
             pass_expr_memo_ctrl=None,
             catch_eval_exceptions=False,
             verbose=0,
             points_to_evaluate=None,
             max_queue_len=1,
             show_progressbar=False,
             ):
    if rstate is None:
        env_rseed = os.environ.get('HYPEROPT_FMIN_SEED', '')
        if env_rseed:
            rstate = np.random.RandomState(int(env_rseed))
        else:
            rstate = np.random.RandomState()

    if trials is None:
        if points_to_evaluate is None:
            trials = base.Trials()
        else:
            assert type(points_to_evaluate) == list
            trials = generate_trials_to_calculate(points_to_evaluate)

    domain = base.Domain(fn, space,
                         pass_expr_memo_ctrl=pass_expr_memo_ctrl)

    rval = FMinIter(algo, domain, trials, max_evals=max_evals,
                    rstate=rstate,
                    verbose=verbose,
                    max_queue_len=max_queue_len,
                    show_progressbar=show_progressbar)
    rval.catch_eval_exceptions = catch_eval_exceptions
    return rval
Beispiel #4
0
    def __init__(self,
                 space,
                 max_concurrent=10,
                 reward_attr="episode_reward_mean",
                 points_to_evaluate=None,
                 **kwargs):
        assert hpo is not None, "HyperOpt must be installed!"
        from hyperopt.fmin import generate_trials_to_calculate
        assert type(max_concurrent) is int and max_concurrent > 0
        self._max_concurrent = max_concurrent
        self._reward_attr = reward_attr
        self.algo = hpo.tpe.suggest
        self.domain = hpo.Domain(lambda spc: spc, space)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert type(points_to_evaluate) == list
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        self.rstate = np.random.RandomState()

        super(HyperOptSearch, self).__init__(**kwargs)
Beispiel #5
0
def generate_trial(isDef=False):
    '''
    Generate an initial Trial object.
    Inputs:
        isDef: Bool, if true initial guess defined
    Returns:
        empty or pre-defined trials object
    '''
    # Modify pts to have a different initial guess
    # NOTE: Hyperopt reads dict keys and values separately,
    # and organizes keys in alphabetical order. While values order
    # remains the same. Thus, place keys in alphabetical order.
    if isDef:
        pts = [{
                'aint_dense': 1,
                'batch_size': 0,
                'nint_dense': 2,
                'optim_type': 0,
                'sint_dense': 0,
                'train_loss': 0
                }]
        new_trials = generate_trials_to_calculate(pts)
    else:
        new_trials = Trials()
    return new_trials
Beispiel #6
0
    def process_meta(self, fn_name, space, algo, max_evals):
        fn = getattr(self, fn_name)
        if fn_name == 'xgb_reg':
            trials = generate_trials_to_calculate([self.meta_param_xgb_reg()])
        else:
            trials = generate_trials_to_calculate([self.meta_param_xgb_clf()])

        try:
            result = fmin(fn=fn,
                          space=space,
                          algo=algo,
                          max_evals=max_evals,
                          trials=trials)
        except Exception as e:
            return {'status': STATUS_FAIL, 'exception': str(e)}

        return trials
Beispiel #7
0
    def __init__(self,
                 space,
                 max_concurrent=10,
                 reward_attr=None,
                 metric="episode_reward_mean",
                 mode="max",
                 points_to_evaluate=None,
                 n_initial_points=20,
                 random_state_seed=None,
                 gamma=0.25,
                 **kwargs):
        assert hpo is not None, "HyperOpt must be installed!"
        from hyperopt.fmin import generate_trials_to_calculate
        assert type(max_concurrent) is int and max_concurrent > 0
        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"

        if reward_attr is not None:
            mode = "max"
            metric = reward_attr
            logger.warning(
                "`reward_attr` is deprecated and will be removed in a future "
                "version of Tune. "
                "Setting `metric={}` and `mode=max`.".format(reward_attr))

        self._max_concurrent = max_concurrent
        self._metric = metric
        # hyperopt internally minimizes, so "max" => -1
        if mode == "max":
            self._metric_op = -1.
        elif mode == "min":
            self._metric_op = 1.
        if n_initial_points is None:
            self.algo = hpo.tpe.suggest
        else:
            self.algo = partial(hpo.tpe.suggest,
                                n_startup_jobs=n_initial_points)
        if gamma is not None:
            self.algo = partial(self.algo, gamma=gamma)
        self.domain = hpo.Domain(lambda spc: spc, space)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert type(points_to_evaluate) == list
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        if random_state_seed is None:
            self.rstate = np.random.RandomState()
        else:
            self.rstate = np.random.RandomState(random_state_seed)

        super(HyperOptSearch, self).__init__(metric=self._metric,
                                             mode=mode,
                                             **kwargs)
Beispiel #8
0
def test_early_stop_no_progress_loss():
    trials = generate_trials_to_calculate([{'x': -100}])
    fmin(fn=lambda x: x,
         space=hp.uniform("x", -5, 5),
         algo=rand.suggest,
         max_evals=500,
         trials=trials,
         early_stop_fn=no_progress_loss(10))

    assert len(trials) == 10
Beispiel #9
0
    def __init__(
            self,
            space=None,
            metric=None,
            mode=None,
            points_to_evaluate=None,
            n_initial_points=20,
            random_state_seed=None,
            gamma=0.25,
            max_concurrent=None,
            use_early_stopped_trials=None,
    ):
        assert hpo is not None, (
            "HyperOpt must be installed! Run `pip install hyperopt`.")
        if mode:
            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
        from hyperopt.fmin import generate_trials_to_calculate
        super(HyperOptSearch, self).__init__(
            metric=metric,
            mode=mode,
            max_concurrent=max_concurrent,
            use_early_stopped_trials=use_early_stopped_trials)
        self.max_concurrent = max_concurrent
        # hyperopt internally minimizes, so "max" => -1
        if mode == "max":
            self.metric_op = -1.
        elif mode == "min":
            self.metric_op = 1.

        if n_initial_points is None:
            self.algo = hpo.tpe.suggest
        else:
            self.algo = partial(
                hpo.tpe.suggest, n_startup_jobs=n_initial_points)
        if gamma is not None:
            self.algo = partial(self.algo, gamma=gamma)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert isinstance(points_to_evaluate, (list, tuple))
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        if random_state_seed is None:
            self.rstate = np.random.RandomState()
        else:
            self.rstate = np.random.RandomState(random_state_seed)

        self.domain = None
        if space:
            self.domain = hpo.Domain(lambda spc: spc, space)
Beispiel #10
0
    def process_xgb_clf(self, max_evals):
        trials = generate_trials_to_calculate([self.meta_param_xgb_clf()])
        try:
            result = fmin(fn=self.xgb_clf,
                          space=xgb_para,
                          trials=trials,
                          algo=tpe.suggest,
                          max_evals=max_evals)
        except Exception as e:
            return {'status': STATUS_FAIL, 'exception': str(e)}

        return result
Beispiel #11
0
 def gen_trials(self):
     '''
     Generate an initial Trial object.
     Redefine this function if you want custom guesses.
     Returns:
         trials: empty or pre-defined trials object
     '''
     # Modify pts to have a different initial guess
     # NOTE: Hyperopt reads dict keys and values separately,
     # and organizes keys in alphabetical order. While values order
     # remains the same. Thus, place keys in alphabetical order.
     if self.hps_guess is not None:
         trials = generate_trials_to_calculate(self.hps_guess)
     else:
         trials = Trials()
     return trials
Beispiel #12
0
 def hyper_optimization(self, train_mask, val_mask):
     def objective(hyperparams):
         model  = GCN({**self.params, **hyperparams, 'timer': self.timer}).to(self.device)
         pred, pred_val, flag = model.train_predict(self.data, train_mask=train_mask, val_mask=val_mask)
         if flag:
             self.flag_end = True
         score = accuracy_score(self.data.y[val_mask].cpu().numpy(), (pred_val.max(1)[1]).cpu().numpy())
         return {'loss': -score, 'status': STATUS_OK, 'pred': pred.cpu().numpy(), 'flag': self.flag_end}
     trials = generate_trials_to_calculate(self.points)
     if self.timer.remain_time() < 5 or self.flag_end:
         self.flag_end = True
         return None, -1.0, None
     best = fmin(fn=objective, space=self.space, trials=trials,
             algo=tpe.suggest, max_evals=5, verbose=0,
             timeout=self.timer.remain_time()-5)
     hyperparams = space_eval(self.space, best)
     best_score = -trials.best_trial['result']['loss']
     pprint.pprint(hyperparams, width=1)
     print('>>>>>>> ', best_score)
     pred = trials.best_trial['result']['pred']
     return pred, best_score, hyperparams
Beispiel #13
0
    def _setup_hyperopt(self) -> None:
        from hyperopt.fmin import generate_trials_to_calculate

        if self._metric is None and self._mode:
            # If only a mode was passed, use anonymous metric
            self._metric = DEFAULT_METRIC

        if self._points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert isinstance(self._points_to_evaluate, (list, tuple))

            for i in range(len(self._points_to_evaluate)):
                config = self._points_to_evaluate[i]
                self._convert_categories_to_indices(config)
            # HyperOpt treats initial points as LIFO, reverse to get FIFO
            self._points_to_evaluate = list(reversed(self._points_to_evaluate))
            self._hpopt_trials = generate_trials_to_calculate(self._points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(self._points_to_evaluate)

        self.domain = hpo.Domain(lambda spc: spc, self._space)
Beispiel #14
0
def get_new_params(experiment: Experiment,
                   rstate,
                   algo=tpe.suggest,
                   n_points=1):
    params = [{p.name: p.value
               for p in result.params} for result in experiment.results]
    trials = generate_trials_to_calculate(params)
    trials.refresh()
    space = convert_parameter_space(experiment.parameter_spaces)
    domain = base.Domain(
        lambda args: experiment.results[params.index(args)].value, space)
    FMinIter(algo, domain, trials, rstate=rstate).serial_evaluate()
    new_ids = trials.new_trial_ids(n_points)
    new_points = algo(new_ids, domain, trials, rstate.randint(2**31 - 1))
    new_params = [[
        Parameter(name=k, value=v[0])
        for k, v in point['misc']['vals'].items()
    ] for point in new_points]
    if experiment.results:
        experiment.best_params = [
            Parameter(name=k, value=v[0])
            for k, v in trials.best_trial['misc']['vals'].items()
        ]
    return new_params
Beispiel #15
0
def main():

    # Imports the dataset and labels and turns them into numpy arrays.
    X = pd.read_csv('train_values.csv', index_col = 0).to_numpy()
    y = np.array(pd.read_csv('train_labels.csv', index_col = 0).to_numpy().T[0])
    
    # Preprocesses data (one hot or ordinal encoding)
    X = preProcess(X, 'OneHotEncoding')

    # Performs PCA on dataset for better visualizaion
    print("Do you want to visualize dataset with PCA?\n")
    pca_inp = input('(y/n): ').lower()

    if pca_inp == 'y':

        size = int(input("Define amount of samples to visualize with PCA: "))
        
        pca = PCA(n_components = 2)
        pca.fit(X)
        
        PCX = pca.transform(X)

        plt.scatter(PCX[:size,0], PCX[:size,1], c = y[:size])
        plt.show()
        print()
    else:
        print()

    # Option to use entire dataset or a reduced set with a more balanced amount of each label.
    print("Use reduced set?\n")
    full = input('(y/n): ')
    print()

    # Create training set and test set
    if full == 'y':
                
        X1 = [X[i] for i in range(len(X)) if y[i] == 1]
        X2 = [X[i] for i in range(len(X)) if y[i] == 2]
        X3 = [X[i] for i in range(len(X)) if y[i] == 3]

        y1 = [y[i] for i in range(len(y)) if y[i] == 1]
        y2 = [y[i] for i in range(len(y)) if y[i] == 2]
        y3 = [y[i] for i in range(len(y)) if y[i] == 3]

        size = min(len(X1),len(X2),len(X3))

        Xp = np.concatenate((X1[:size], X2[:size], X3[:size]))
        yp = np.concatenate((y1[:size], y2[:size], y3[:size]))

        train_X, test_X, train_y, test_y = train_test_split(
            Xp, yp, test_size = 0.2)
        
    else:
      
        train_X, test_X, train_y, test_y = train_test_split(
            X, y, test_size = 0.2, random_state=42)


    # Initializes last_classifier variable.
    last_classifier = None

    # 'Front-end'
    while True:
    
        print("Choose the training model: ")
        print(' - Network\n - GBM\n - LGBM\n - GridSearch\n - Hyperopt\n')
        inp = input('>> ').lower()
        print()

################################################################################################################################################################
        
        if inp == 'network':

            print('-- SkLearn MLP Classifier (neural network) --\n')

            it = int(input('Define maximum number of iterations: '))
            print()

            layers = [int(x) for x in input("Define network architecture: ").replace(' ', '').split(',')]
            print()

            alpha = input("Define regularization term alpha (default = 0.0001): ")
            if alpha == '':
                alpha = 0.0001
            print()

            eps = input("Define stability term epsilon (default = 1e-8): ")
            if eps == '':
                eps = 0.00000001
            print()

            activation = input("Define activation function (default = 'relu'): ")
            if activation == '':
                activation = 'relu'
            print()

            l_rate = input("Define initial learning rate (default = 0.001): ")
            if l_rate == '':
                l_rate = 0.001
            print()

            solver = 'adam'
            decay = input("Define learning rate decay (default = 'constant'): ")
            if decay == '':
                decay = 'constant'
            print()

            beta_1 = input("Define beta 1 (default = 0.999): ")
            if beta_1 == '':
                beta_1 = 0.999
            print()

            beta_2 = input("Define beta 2 (default = 0.999): ")
            if beta_2 == '':
                beta_2 = 0.999
            print()

            print("Want early stopping (default = False)?")
            early = input('(y/n): ').lower()
            if early == 'y':
                early = True
            else:
                early = False
            print()

            print("Want warm start (default = False)?")
            warm = input('(y/n): ').lower()
            if early == 'y':
                warm = True
            else:
                warm = False
            print()

            num = input("Define number of iterations without change, to declare convergence (default = 10): ")
            if num == '':
                num = 10
            print()

            
            tol = input("Finally, define the tolerance (default = 0.0001): ")
            if tol == '':
                tol = 0.0001
            
            print('\n-- Training neural network --\n')

            mlp = MLPClassifier(
                hidden_layer_sizes = layers, max_iter = it, alpha = float(alpha), activation = activation,
                learning_rate = decay, learning_rate_init = float(l_rate), verbose = True,
                early_stopping = early, epsilon = float(eps), validation_fraction = 0.2, solver = solver,
                beta_1 = float(beta_1), beta_2 = float(beta_2), warm_start = warm, tol = float(tol), n_iter_no_change = int(num))

            mlp.fit(train_X, train_y)

            print()
            
            preds = mlp.predict(train_X)


            print('Results on training set:')
            print(classification_report(train_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n')

            print('-'*80, '\n')

            preds = mlp.predict(test_X)

            print('Results on cross-validation set:')
            print(classification_report(test_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n')

            last_classifier = mlp

            winsound.Beep(frequency, duration)

            print('='*80, '\n')

################################################################################################################################################################

        elif inp == 'gbm':

            print('-- SkLearn GBM Classifier --\n')

            learning_rate = input("Define learning rate (default = 0.1): ")
            if learning_rate == '':
                learning_rate = 0.1
            print()

            n_estimators = input("Define number of estimators (default = 100): ")
            if n_estimators == '':
                n_estimators = 100
            print()

            subsample = input("Define subsample percentage (default = 100 %): ")
            if subsample == '' or float(subsample) > 100:
                subsample = 100
            print()

            min_samples_split = input("Define minimum number of samples to split node (default = 2): ")
            if min_samples_split == '':
                min_samples_split = 2
            print()

            min_samples_leaf = input("Define minimum number of samples to make node a leaf (default = 1): ")
            if min_samples_leaf == '':
                min_samples_leaf = 1
            print()

            max_depth = input("Define maximum depth of individual estimators (default = 3): ")
            if max_depth == '':
                max_depth = 3
            print()

            print("Do you want cross-validation for early-stopping?")
            n_iter_no_change = input("(y/n): ")
            if n_iter_no_change == 'y':
                n_iter_no_change = 15
            else:
                n_iter_no_change = None
            print("\n-- Training GBM --\n")

            gbm = GradientBoostingClassifier(
                learning_rate = float(learning_rate), n_estimators = int(n_estimators), subsample = float(subsample)/100,
                min_samples_split = int(min_samples_split), min_samples_leaf = int(min_samples_leaf), max_depth = int(max_depth),
                verbose = True, n_iter_no_change = n_iter_no_change)

            gbm.fit(train_X, train_y)

            preds = gbm.predict(train_X)

            print('Results on training set:')
            print(classification_report(train_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n')

            print('-'*80, '\n')

            preds = gbm.predict(test_X)

            print('Results on cross-validation set:')
            print(classification_report(test_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n')

            last_classifier = gbm

            winsound.Beep(frequency, duration)

            print('='*80, '\n')

################################################################################################################################################################

        elif inp == 'lgbm':

            print('-- SkLearn LightGBM API --\n')

            max_depth = input('Define max depth of tree (default = -1): ')
            if max_depth == '':
                max_depth = -1
            print()

            min_data_in_leaf = input('Define min data in leaf (default = 20): ')
            if min_data_in_leaf == '':
                min_data_in_leaf = 20
            print()

            feature_fraction = input('Define feature fraction for random subsampling (default = 1): ')
            if feature_fraction == '' or not (0 < float(feature_fraction) <= 1):
                feature_fraction = 1
            print()

            bagging_freq = 0
            bagging_fraction = input('Define bagging fraction for random sampling (default = 1): ')
            if bagging_fraction == '' or not (0 < float(bagging_fraction) <= 1):
                bagging_fraction = 1
            print()
            if float(bagging_fraction) < 1:
                bagging_freq = input('Define frequecy for bagging (default = 0): ')
                if bagging_freq == '':
                    bagging_freq = 0
                print()

            alpha = input('Define regularization alpha (default = 0): ')
            if alpha == '':
                alpha = 0
            print()

            lamb = input('Define regularization lambda (default = 0): ')
            if lamb == '':
                lamb = 0
            print()

            min_gain_to_split = input('Define min gain to split node (default = 0): ')
            if min_gain_to_split == '':
                min_gain_to_split = 0
            print()

            objective = input('Define objective (default = softmax)\n\nOptions: \n- num_class\n- softmax\n- ovr\n\n>> ').lower()
            if objective == '':
                objective = 'softmax'
            print()

            num_boost_round = input('Define number of iterations (default = 100): ')
            if num_boost_round == '':
                num_boost_round = 100
            print()

            l_rate = input('Define learning rate (default = 0.1): ')
            if l_rate == '':
                l_rate = 0.1
            print()

            num_leaves = input('Define max number of leaves in a tree (default = 31): ')
            if num_leaves == '':
                num_leaves = 31
            print()

            max_bin = input('Define max number of bins (default = 255): ')
            if max_bin == '':
                max_bin = 255
            print()

            min_sum_hessian_in_leaf = input('Define min hessian in leaf (default = 0.001): ')
            if min_sum_hessian_in_leaf == '':
                min_sum_hessian_in_leaf = 0.001
            print()

            print('Compensate for unbalanced dataset?')
            is_unbalance = input('(y/n): ').lower()
            if is_unbalance == 'y':
                is_unbalance = True
            else:
                is_unbalance = False
            print()

            print('-- Training Light GBM --\n')
            
            gbm = lgb.LGBMClassifier(max_depth = int(max_depth),
                min_data_in_leaf = int(min_data_in_leaf),
                feature_fraction = float(feature_fraction),
                bagging_fraction = float(bagging_fraction),
                bagging_freq = int(bagging_freq),
                lambda_l1 = float(alpha),
                lambda_l2 = float(lamb),
                min_gain_to_split = float(min_gain_to_split),
                objective = objective,
                num_boost_round = int(num_boost_round),
                learning_rate = float(l_rate),
                num_leaves = int(num_leaves),
                gpu_use_dp = True,
                num_threads = 2,
                num_class = 3,
                is_unbalance = is_unbalance,
                verbosity = 10,
                max_bin = int(max_bin),
                min_sum_hessian_in_leaf = float(min_sum_hessian_in_leaf),
                )
            
            warnings.filterwarnings("ignore")
            gbm.fit(train_X, train_y)
            warnings.filterwarnings("default")

            preds = gbm.predict(train_X)

            print('Results on training set:')
            print(classification_report(train_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n')

            print('-'*80, '\n')

            preds = gbm.predict(test_X)

            print('Results on cross-validation set:')
            print(classification_report(test_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n')

            last_classifier = gbm

            winsound.Beep(frequency, duration)

            print('='*80, '\n')

################################################################################################################################################################

        elif inp == 'gridsearch':

            print('-- Random GridSearchCV on LightGBM classifier --\n')

            iters = input('Define number of iterations (default = 10): ')
            if iters == '':
                iters = 10
            print()

            cv = input('Define number of CV (default = 2): ')
            if cv == '':
                cv = 2
            print()
            
            param_distributions = {
                'max_depth': [-1,10,30,60,100,200],
                'min_data_in_leaf': [4,8,16,32,64,128],
                'feature_fraction': [0.1,0.25,0.5,0.75,1],
                'bagging_fraction': [0.1,0.25,0.5,0.75,1],
                'bagging_freq': [0,2,8,32,128],
                'lambda_l1': [0,0.1,1,2,4,175,512,1000],
                'lambda_l2': [0,0.1,1,2,4,175,512,1000],
                'min_gain_to_split': [0,0.01,0.03,0.1,0.3,0.5,0.9,1],
                'num_boost_round': [100, 500, 1000, 2500, 5000, 10000, 15000, 20000],
                'learning_rate': [0.5, 0.25, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001],
                'num_leaves': [10, 31, 62, 124, 200, 500, 750, 1000],
                'objective': ['softmax'],
                'gpu_use_dp': [True],
                'num_threads': [1],
                'num_class': [3],
                'max_bin': [128, 256, 512, 1024, 2048, 3000, 4000, 5000, 6000]
                }

            print('-- Finding best parameters --\n')

            rSearch = RandomizedSearchCV(estimator = lgb.LGBMClassifier(), param_distributions = param_distributions, scoring = 'f1_micro', n_jobs = 2,
                                         cv = int(cv), verbose = 10, n_iter = int(iters))

            warnings.filterwarnings("ignore")
            rSearch.fit(train_X, train_y)
            warnings.filterwarnings("default")

            print()

            print('Best parameters:')
            print(rSearch.best_params_, '\n')

            print('Score:')
            print(rSearch.best_score_, '\n')
            
            winsound.Beep(frequency, duration)

            print('='*80, '\n')

################################################################################################################################################################

        elif inp == 'hyperopt':
            
            print('-- Bayesian Optimization on LightGBM classifier (with Hyperopt) --\n')

            iters = input('Define number of evaluations (default = 50): ')
            if iters == '':
                iters = 50
            print()
            
            def objective_fun(space):

                print()

                warnings.filterwarnings('ignore')
                model = lgb.LGBMClassifier(**space)
                accuracy = cross_val_score(model, X, y, cv = 3, scoring = 'f1_micro').mean()
                warnings.filterwarnings('default')

                return {'loss': - accuracy, 'status': STATUS_OK}

            param_space = {
                'max_depth': 0,
                'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 1, 5000, 1)),
                'feature_fraction': hp.uniform('feature_fraction', 0, 1),
                'bagging_fraction': hp.uniform('bagging_fraction', 0, 1),
                'bagging_freq': scope.int(hp.quniform('bagging_freq', 0, 100 ,1)),
                'lambda_l1': hp.uniform('lambda_l1', 0, 10000),
                'lambda_l2': hp.uniform('lambda_l2', 0, 10000),
                'min_gain_to_split': hp.uniform('min_gain_to_split', 0, 11),
                'num_boost_round': scope.int(hp.quniform('num_boost_round', 100, 20000, 1)),
                'learning_rate': hp.uniform('learning_rate', 0.000001, 1),
                'num_leaves': scope.int(hp.quniform('num_leaves', 2, 2000, 1)),
                'objective': 'softmax',
                'gpu_use_dp': True,
                'num_threads': 2,
                'num_class': 3,
                'max_bin': scope.int(hp.quniform('max_bin', 32, 4096, 1)),
                'min_sum_hessian_in_leaf': hp.uniform('min_sum_hessian_in_leaf', 0, 5)
                }

            param_init_trials = {
                'max_depth': 0,
                'min_data_in_leaf': 40,
                'feature_fraction': 0.5,
                'bagging_fraction': 0.9,
                'bagging_freq': 1,
                'lambda_l1': 0,
                'lambda_l2': 10,
                'min_gain_to_split': 0,
                'num_boost_round': 10000,
                'learning_rate': 0.1,
                'num_leaves': 31,
                'objective': 'softmax',
                'gpu_use_dp': True,
                'num_threads': 2,
                'num_class': 3,
                'max_bin': 256,
                'min_sum_hessian_in_leaf': 0.1
                }

            trials = generate_trials_to_calculate([param_init_trials])
            best = fmin(fn = objective_fun, space = param_space,
                        algo = tpe.suggest, max_evals = int(iters),
                        trials = trials)

            print()
            print(best, '\n')
            input()

            


################################################################################################################################################################

        elif last_classifier != None:

            # Decides if wants to submit
            print('Want to submit?')
            inp = input('(y/n): ').lower()
            print()
            if inp == 'y':

                # Decides if wants to fit classifier again for the entirety of the dataset
                print('Want to fit for entire dataset?')
                inp = input('(y/n): ').lower()
                print()
                if inp == 'y':
                    print('-- Training the last classifier --\n')
                    warnings.filterwarnings("ignore")
                    last_classifier.fit(X, y)
                    warnings.filterwarnings("default")
                    print()
                submit(last_classifier)
                return
            else:
                pass
Beispiel #16
0
def main():

    # Imports the dataset and labels and turns them into numpy arrays.
    X = pd.read_csv('train_values.csv', index_col = 0).to_numpy()
    y = np.array(pd.read_csv('train_labels.csv', index_col = 0).to_numpy().T[0])
    
    # Preprocesses data (one hot or ordinal encoding)
    X = preProcess(X, 'OneHotEncoding')

    # Performs PCA on dataset for better visualizaion
    print("Do you want to visualize dataset with PCA?\n")
    pca_inp = input('(y/n): ').lower()

    if pca_inp == 'y':

        size = int(input("Define amount of samples to visualize with PCA: "))
        
        pca = PCA(n_components = 2)
        pca.fit(X)
        
        PCX = pca.transform(X)

        plt.scatter(PCX[:size,0], PCX[:size,1], c = y[:size])
        plt.show()
        print()
    else:
        print()

    # Option to use entire dataset or a reduced set with a more balanced amount of each label.
    print("Use reduced set?\n")
    full = input('(y/n): ')
    print()

    # Create training set and test set
    if full == 'y':
                
        X1 = [X[i] for i in range(len(X)) if y[i] == 1]
        X2 = [X[i] for i in range(len(X)) if y[i] == 2]
        X3 = [X[i] for i in range(len(X)) if y[i] == 3]

        y1 = [y[i] for i in range(len(y)) if y[i] == 1]
        y2 = [y[i] for i in range(len(y)) if y[i] == 2]
        y3 = [y[i] for i in range(len(y)) if y[i] == 3]

        size = min(len(X1),len(X2),len(X3))

        Xp = np.concatenate((X1[:size], X2[:size], X3[:size]))
        yp = np.concatenate((y1[:size], y2[:size], y3[:size]))

        train_X, test_X, train_y, test_y = train_test_split(
            Xp, yp, test_size = 0.2)
        
    else:
      
        train_X, test_X, train_y, test_y = train_test_split(
            X, y, test_size = 0.2, random_state=42)


    # Initializes last_classifier variable.
    last_classifier = None

    # 'Front-end'
    while True:
    
        print("Choose the training model: ")
        print(' - LGBM\n - GridSearch\n - Hyperopt\n - Evolve\n')
        inp = input('>> ').lower()
        print()

################################################################################################################################################################

        if inp == 'lgbm':

            print('-- SkLearn LightGBM API --\n')

            max_depth = input('Define max depth of tree (default = -1): ')
            if max_depth == '':
                max_depth = -1
            print()

            min_data_in_leaf = input('Define min data in leaf (default = 20): ')
            if min_data_in_leaf == '':
                min_data_in_leaf = 20
            print()

            feature_fraction = input('Define feature fraction for random subsampling (default = 1): ')
            if feature_fraction == '' or not (0 < float(feature_fraction) <= 1):
                feature_fraction = 1
            print()

            bagging_freq = 0
            bagging_fraction = input('Define bagging fraction for random sampling (default = 1): ')
            if bagging_fraction == '' or not (0 < float(bagging_fraction) <= 1):
                bagging_fraction = 1
            print()
            if float(bagging_fraction) < 1:
                bagging_freq = input('Define frequecy for bagging (default = 0): ')
                if bagging_freq == '':
                    bagging_freq = 0
                print()

            alpha = input('Define regularization alpha (default = 0): ')
            if alpha == '':
                alpha = 0
            print()

            lamb = input('Define regularization lambda (default = 0): ')
            if lamb == '':
                lamb = 0
            print()

            min_gain_to_split = input('Define min gain to split node (default = 0): ')
            if min_gain_to_split == '':
                min_gain_to_split = 0
            print()

            objective = input('Define objective (default = softmax)\n\nOptions: \n- num_class\n- softmax\n- ovr\n\n>> ').lower()
            if objective == '':
                objective = 'softmax'
            print()

            num_boost_round = input('Define number of iterations (default = 100): ')
            if num_boost_round == '':
                num_boost_round = 100
            print()

            l_rate = input('Define learning rate (default = 0.1): ')
            if l_rate == '':
                l_rate = 0.1
            print()

            num_leaves = input('Define max number of leaves in a tree (default = 31): ')
            if num_leaves == '':
                num_leaves = 31
            print()

            max_bin = input('Define max number of bins (default = 255): ')
            if max_bin == '':
                max_bin = 255
            print()

            min_sum_hessian_in_leaf = input('Define min hessian in leaf (default = 0.001): ')
            if min_sum_hessian_in_leaf == '':
                min_sum_hessian_in_leaf = 0.001
            print()

            print('Compensate for unbalanced dataset?')
            is_unbalance = input('(y/n): ').lower()
            if is_unbalance == 'y':
                is_unbalance = True
            else:
                is_unbalance = False
            print()

            print('-- Training Light GBM --\n')
            
            gbm = lgb.LGBMClassifier(max_depth = int(max_depth),
                min_data_in_leaf = int(min_data_in_leaf),
                feature_fraction = float(feature_fraction),
                bagging_fraction = float(bagging_fraction),
                bagging_freq = int(bagging_freq),
                lambda_l1 = float(alpha),
                lambda_l2 = float(lamb),
                min_gain_to_split = float(min_gain_to_split),
                objective = objective,
                num_boost_round = int(num_boost_round),
                learning_rate = float(l_rate),
                num_leaves = int(num_leaves),
                gpu_use_dp = True,
                num_threads = 2,
                num_class = 3,
                is_unbalance = is_unbalance,
                verbosity = 10,
                max_bin = int(max_bin),
                min_sum_hessian_in_leaf = float(min_sum_hessian_in_leaf),
                )
            
            warnings.filterwarnings("ignore")
            gbm.fit(train_X, train_y)
            warnings.filterwarnings("default")

            preds = gbm.predict(train_X)

            print('Results on training set:')
            print(classification_report(train_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n')

            print('-'*80, '\n')

            preds = gbm.predict(test_X)

            print('Results on cross-validation set:')
            print(classification_report(test_y, preds, zero_division = 1))
            print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n')

            last_classifier = gbm

            winsound.Beep(frequency, duration)

            print('='*80, '\n')

################################################################################################################################################################

        elif inp == 'gridsearch':

            print('-- Random GridSearchCV on LightGBM classifier --\n')

            iters = input('Define number of iterations (default = 10): ')
            if iters == '':
                iters = 10
            print()

            cv = input('Define number of CV (default = 2): ')
            if cv == '':
                cv = 2
            print()
            
            param_distributions = {
                'max_depth': [-1,10,30,60,100,200],
                'min_data_in_leaf': [4,8,16,32,64,128],
                'feature_fraction': [0.1,0.25,0.5,0.75,1],
                'bagging_fraction': [0.1,0.25,0.5,0.75,1],
                'bagging_freq': [0,2,8,32,128,256],
                'lambda_l1': [0,0.1,1,2,4,175,512,1000],
                'lambda_l2': [0,0.1,1,2,4,175,512,1000],
                'min_gain_to_split': [0,0.01,0.03,0.1,0.3,0.5,0.9,1],
                'num_boost_round': [100, 500, 1000, 2500, 5000, 10000, 15000, 20000],
                'learning_rate': [0.5, 0.25, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001],
                'num_leaves': [10, 31, 62, 124, 200, 500, 750, 1000],
                'objective': ['softmax'],
                'gpu_use_dp': [True],
                'num_threads': [1],
                'num_class': [3],
                'max_bin': [128, 256, 512, 1024, 2048, 3000, 4000, 5000, 6000]
                }

            print('-- Finding best parameters --\n')

            rSearch = RandomizedSearchCV(estimator = lgb.LGBMClassifier(), param_distributions = param_distributions, scoring = 'f1_micro', n_jobs = 2,
                                         cv = int(cv), verbose = 10, n_iter = int(iters))

            warnings.filterwarnings("ignore")
            rSearch.fit(train_X, train_y)
            warnings.filterwarnings("default")

            print()

            print('Best parameters:')
            print(rSearch.best_params_, '\n')

            print('Score:')
            print(rSearch.best_score_, '\n')
            
            winsound.Beep(frequency, duration)

            print('='*80, '\n')

################################################################################################################################################################

        elif inp == 'hyperopt':
            
            print('-- Bayesian Optimization on LightGBM classifier (with Hyperopt) --\n')

            iters = input('Define number of evaluations (default = 50): ')
            if iters == '':
                iters = 50
            print()
            
            def objective_fun(space):

                print()

                warnings.filterwarnings('ignore')
                model = lgb.LGBMClassifier(**space)
                accuracy = cross_val_score(model, X, y, cv = 3, scoring = 'f1_micro').mean()
                warnings.filterwarnings('default')

                return {'loss': - accuracy, 'status': STATUS_OK}

            param_space = {
                'max_depth': [0],
                'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 1, 5000, 1)),
                'feature_fraction': hp.uniform('feature_fraction', 0, 1),
                'bagging_fraction': hp.uniform('bagging_fraction', 0, 1),
                'bagging_freq': scope.int(hp.quniform('bagging_freq', 0, 1000 ,1)),
                'lambda_l1': hp.uniform('lambda_l1', 0, 10000),
                'lambda_l2': hp.uniform('lambda_l2', 0, 10000),
                'min_gain_to_split': hp.uniform('min_gain_to_split', 0, 1),
                'num_boost_round': scope.int(hp.quniform('num_boost_round', 100, 20000, 1)),
                'learning_rate': hp.uniform('learning_rate', 0.000001, 1),
                'num_leaves': scope.int(hp.quniform('num_leaves', 2, 2000, 1)),
                'objective': ['softmax'],
                'gpu_use_dp': [True],
                'num_threads': [2],
                'num_class': [3],
                'max_bin': scope.int(hp.quniform('max_bin', 32, 4096, 1)),
                'min_sum_hessian_in_leaf': hp.uniform('min_sum_hessian_in_leaf', 0, 5)
                }

            param_init_trials = {
                'max_depth': 0,
                'min_data_in_leaf': 40,
                'feature_fraction': 0.5,
                'bagging_fraction': 0.9,
                'bagging_freq': 1,
                'lambda_l1': 0,
                'lambda_l2': 10,
                'min_gain_to_split': 0,
                'num_boost_round': 10000,
                'learning_rate': 0.1,
                'num_leaves': 31,
                'objective': 'softmax',
                'gpu_use_dp': True,
                'num_threads': 2,
                'num_class': 3,
                'max_bin': 256,
                'min_sum_hessian_in_leaf': 0.1
                }

            trials = generate_trials_to_calculate([param_init_trials])
            best = fmin(fn = objective_fun, space = param_space,
                        algo = tpe.suggest, max_evals = int(iters),
                        trials = trials)

            print()
            print(best, '\n')
            input()

        elif inp == 'evolve':

            print('-- Evolutionary optimization on LightGBM --\n')

            iters = input('Define number of generations (default = 10): ')
            if iters == '':
                iters = 10
            print()

            cv = input('Define number of cross-validation sets (default = 2): ')
            if cv == '':
                cv = 2
            print()

            c_size = input('Define size of change on each iteration (default = 1): ')
            if c_size == '':
                c_size = 1
            print()

            pop_size = input('Define size of population (default = 10): ')
            if pop_size == '':
                pop_size = 10

            param_space = {
                'max_depth': [-1],
                'min_data_in_leaf': np.linspace(0, 5000, 5002, dtype = int),
                'feature_fraction': np.linspace(0, 1, 1000),
                'bagging_fraction': np.linspace(0, 1, 1000),
                'bagging_freq': np.linspace(0, 100 ,102, dtype = int),
                'lambda_l1': np.linspace(0, 10000, 100000),
                'lambda_l2': np.linspace(0, 10000, 100000),
                'min_gain_to_split': np.linspace(0, 0.9, 100),
                'num_boost_round': np.linspace(100, 20000, 19902, dtype = int),
                'learning_rate': np.linspace(0.0001, 1, 10000),
                'num_leaves': np.linspace(2, 2000, 2000, dtype = int),
                'objective': ['softmax'],
                'gpu_use_dp': [True],
                'num_threads': [2],
                'num_class': [3],
                'max_bin': np.linspace(32, 4096, 4066, dtype = int),
                'min_sum_hessian_in_leaf': np.linspace(0, 5, 100)
                }

            common_ancestor = {
                'max_depth': -1,
                'min_data_in_leaf': 20,
                'feature_fraction': 0.5,
                'bagging_fraction': 0.9,
                'bagging_freq': 1,
                'lambda_l1': 0.,
                'lambda_l2': 10.,
                'min_gain_to_split': 0.,
                'num_boost_round': 2000,
                'learning_rate': 0.1,
                'num_leaves': 31,
                'objective': 'softmax',
                'gpu_use_dp': True,
                'num_threads': 2,
                'num_class': 3,
                'max_bin': 256,
                'min_sum_hessian_in_leaf': 0.1
                }

            evolveSelect(X, y, param_space, int(iters), int(pop_size), float(c_size), int(cv), common_ancestor)
                

################################################################################################################################################################

        elif last_classifier != None:

            # Decides if wants to submit
            print('Want to submit?')
            inp = input('(y/n): ').lower()
            print()
            if inp == 'y':

                # Decides if wants to fit classifier again for the entirety of the dataset
                print('Want to fit for entire dataset?')
                inp = input('(y/n): ').lower()
                print()
                if inp == 'y':
                    print('-- Training the last classifier --\n')
                    warnings.filterwarnings("ignore")
                    last_classifier.fit(X, y)
                    warnings.filterwarnings("default")
                    print()
                submit(last_classifier)
                return
            else:
                pass
Beispiel #17
0
def bayesian_parameter_optimisation(tree,
                                    toolbox,
                                    max_evals_without_progress=10):
    """
    Optimises the parameters in tree with bayesian optimisation.
    Returns a copy of the tree with the updated hyperparameters.
    :param tree:
    :return:
    """
    hyperparameters, hyperparameter_indices, default_values = _get_hyperparameters_from_tree(
        tree)

    if not hyperparameters:
        # Cant optimise a tree with no tunable args, so just return a copy of the original tree
        return toolbox.clone(tree)

    print("Original tree", tree)

    # Start the search at the existing values rather than randomly
    trials = generate_trials_to_calculate([default_values])

    # Each time we do bayesian optimisation we should use a new random seed to prevent overfitting
    # to a particular split
    seed = random.randint(0, 1000)

    stopping_critera_met = False
    optimised_params = space_eval(hyperparameters, default_values)

    n_iters_without_progress = 0
    best_loss = inf

    # Run one iteration of bayesian optimisation till stopping criteria is met (timeout or no improvement)
    while not stopping_critera_met:
        try:
            # A single iteration of bayesian optimisation
            best = fmin(fn=partial(_objective_function, tree, toolbox,
                                   hyperparameter_indices, seed),
                        space=hyperparameters,
                        algo=tpe.suggest,
                        max_evals=len(trials) + 1,
                        trials=trials,
                        show_progressbar=False)
            optimised_params = space_eval(hyperparameters, best)

            # Check if progress was made
            current_loss = trials.losses()[-1]

            if current_loss >= best_loss:
                n_iters_without_progress += 1
            else:
                n_iters_without_progress = 0

            best_loss = min(current_loss, best_loss)

        except TimeoutError:
            # Ran out of time while optimising. Break out of loop and return best we have
            break

        stopping_critera_met = n_iters_without_progress >= max_evals_without_progress

    tree = _fill_with_hyperparameters(tree, toolbox, hyperparameter_indices,
                                      optimised_params)
    print("Optimised tree", tree)
    return tree
Beispiel #18
0
def fmin(
    fn,
    space,
    algo,
    max_evals,
    early_stop_round_mode_fun=None,
    early_stop_round=None,
    trials=None,
    rstate=None,
    allow_trials_fmin=False,
    pass_expr_memo_ctrl=None,
    catch_eval_exceptions=False,
    verbose=0,
    return_argmin=True,
    points_to_evaluate=None,
    max_queue_len=1,
    show_progressbar=True,
):
    """Minimize a function over a hyperparameter space.

    More realistically: *explore* a function over a hyperparameter space
    according to a given algorithm, allowing up to a certain number of
    function evaluations.  As points are explored, they are accumulated in
    `trials`


    Parameters
    ----------

    fn : callable (trial point -> loss)
        This function will be called with a value generated from `space`
        as the first and possibly only argument.  It can return either
        a scalar-valued loss, or a dictionary.  A returned dictionary must
        contain a 'status' key with a value from `STATUS_STRINGS`, must
        contain a 'loss' key if the status is `STATUS_OK`. Particular
        optimization algorithms may look for other keys as well.  An
        optional sub-dictionary associated with an 'attachments' key will
        be removed by fmin its contents will be available via
        `trials.trial_attachments`. The rest (usually all) of the returned
        dictionary will be stored and available later as some 'result'
        sub-dictionary within `trials.trials`.

    space : hyperopt.pyll.Apply node
        The set of possible arguments to `fn` is the set of objects
        that could be created with non-zero probability by drawing randomly
        from this stochastic program involving involving hp_<xxx> nodes
        (see `hyperopt.hp` and `hyperopt.pyll_utils`).

    algo : search algorithm
        This object, such as `hyperopt.rand.suggest` and
        `hyperopt.tpe.suggest` provides logic for sequential search of the
        hyperparameter space.

    max_evals : int
        Allow up to this many function evaluations before returning.

    trials : None or base.Trials (or subclass)
        Storage for completed, ongoing, and scheduled evaluation points.  If
        None, then a temporary `base.Trials` instance will be created.  If
        a trials object, then that trials object will be affected by
        side-effect of this call.

    rstate : numpy.RandomState, default numpy.random or `$HYPEROPT_FMIN_SEED`
        Each call to `algo` requires a seed value, which should be different
        on each call. This object is used to draw these seeds via `randint`.
        The default rstate is
        `numpy.random.RandomState(int(env['HYPEROPT_FMIN_SEED']))`
        if the `HYPEROPT_FMIN_SEED` environment variable is set to a non-empty
        string, otherwise np.random is used in whatever state it is in.

    verbose : int
        Print out some information to stdout during search.

    allow_trials_fmin : bool, default True
        If the `trials` argument

    pass_expr_memo_ctrl : bool, default False
        If set to True, `fn` will be called in a different more low-level
        way: it will receive raw hyperparameters, a partially-populated
        `memo`, and a Ctrl object for communication with this Trials
        object.

    return_argmin : bool, default True
        If set to False, this function returns nothing, which can be useful
        for example if it is expected that `len(trials)` may be zero after
        fmin, and therefore `trials.argmin` would be undefined.

    points_to_evaluate : list, default None
        Only works if trials=None. If points_to_evaluate equals None then the
        trials are evaluated normally. If list of dicts is passed then
        given points are evaluated before optimisation starts, so the overall
        number of optimisation steps is len(points_to_evaluate) + max_evals.
        Elements of this list must be in a form of a dictionary with variable
        names as keys and variable values as dict values. Example
        points_to_evaluate value is [{'x': 0.0, 'y': 0.0}, {'x': 1.0, 'y': 2.0}]

    max_queue_len : integer, default 1
        Sets the queue length generated in the dictionary or trials. Increasing this
        value helps to slightly speed up parallel simulatulations which sometimes lag
        on suggesting a new trial.

    show_progressbar : bool, default True
        Show a progressbar.

    Returns
    -------

    argmin : dictionary
        If return_argmin is True returns `trials.argmin` which is a dictionary.  Otherwise
        this function  returns the result of `hyperopt.space_eval(space, trails.argmin)` if there
        were succesfull trails. This object shares the same structure as the space passed.
        If there were no succesfull trails, it returns None.
    """
    if rstate is None:
        env_rseed = os.environ.get('HYPEROPT_FMIN_SEED', '')
        if env_rseed:
            rstate = np.random.RandomState(int(env_rseed))
        else:
            rstate = np.random.RandomState()

    if allow_trials_fmin and hasattr(trials, 'fmin'):
        return trials.fmin(
            fn,
            space,
            algo=algo,
            max_evals=max_evals,
            max_queue_len=max_queue_len,
            rstate=rstate,
            pass_expr_memo_ctrl=pass_expr_memo_ctrl,
            verbose=verbose,
            catch_eval_exceptions=catch_eval_exceptions,
            return_argmin=return_argmin,
            show_progressbar=show_progressbar,
        )

    if trials is None:
        if points_to_evaluate is None:
            trials = base.Trials()
        else:
            assert type(points_to_evaluate) == list
            trials = generate_trials_to_calculate(points_to_evaluate)

    domain = base.Domain(fn, space, pass_expr_memo_ctrl=pass_expr_memo_ctrl)

    if early_stop_round is not None:
        # max_evals = 1000
        if early_stop_round_mode_fun is None:
            early_stop_round_mode_fun = lambda x: 1

    rval = FMinIter(algo,
                    domain,
                    trials,
                    max_evals=max_evals,
                    early_stop_round_mode_fun=early_stop_round_mode_fun,
                    early_stop_round=early_stop_round,
                    rstate=rstate,
                    verbose=verbose,
                    max_queue_len=max_queue_len,
                    show_progressbar=show_progressbar)
    rval.catch_eval_exceptions = catch_eval_exceptions
    rval.exhaust()
    if return_argmin:
        if len(trials.trials) == 0:
            raise Exception(
                "There are no evaluation tasks, cannot return argmin of task losses."
            )
        return trials.argmin
    elif len(trials) > 0:
        # Only if there are some succesfull trail runs, return the best point in the evaluation space
        return space_eval(space, trials.argmin)
    else:
        return None
def opt_method(data, initializers, resdir, max_evals):

    dataset = data.dataset_name
    __location__ = os.path.realpath(os.path.join(os.getcwd(),
                                                 os.path.dirname(__file__)))
    datapath = data.data_path
    init_endmembers = data.init_endmembers

    n_band, n_end = init_endmembers.shape

    def objective_func(data, hyperpars):
        data.load_data(normalize=True, shuffle=False)

        activation = LeakyReLU(0.2)

        unmixer = Autoencoder(n_end=n_end, data=my_data, activation=activation,
                              optimizer=hyperpars['optimizer'], l2=hyperpars['l2'], l1=hyperpars['l1'], plot_every_n=0)

        unmixer.create_model(SAD)
        my_data.make_patches(1, num_patches=hyperpars['num_patches'], use_orig=True)
        history = unmixer.fit(epochs=100, batch_size=hyperpars['batch_size'])

        endmembers = unmixer.get_endmembers().transpose()
        abundances = unmixer.get_abundances()
        Y = np.transpose(data.orig_data)
        GT = np.transpose(data.GT)
        sad, idx_org, idx_hat, sad_k_m, s0 = calc_SAD_2(GT, endmembers)
        MSE = mse(Y, endmembers, np.transpose(abundances))
        abundances = abundances.reshape(data.n_rows, data.n_cols, endmembers.shape[1]).transpose((1, 0, 2))
        resdict = {'endmembers': endmembers,
                   'abundances': abundances,
                   'loss': history.history['loss'],
                   'SAD': sad,
                   'MSE': MSE}

        del unmixer
        K.clear_session()

        return {'loss': sad, 'status': STATUS_OK, 'attachments': resdict}


    space = {
        'optimizer': {'class_name': 'RMSprop', 'config': {'lr': hp.qloguniform('ACCESS_' + dataset + '_lr', -16, -1, 1e-7)}},
        'l1': hp.qloguniform('ACCESS_' + dataset + '_l1', -16, 2, 1e-7),
        'l2': hp.qloguniform('ACCESS_' + dataset + '_l2', -16, 2, 1e-7),
        'num_patches': scope.int(hp.quniform('ACCESS_' + dataset + '_num_patches', 8, 8192, 1)),
        'batch_size': scope.int(hp.quniform('ACCESS_' + dataset + '_batch_size', 1, 50, 1)),
    }

    my_data = HSI(datapath)

    trials = generate_trials_to_calculate([{
        'ACCESS_' + dataset + '_lr': 0.001,
        'ACCESS_' + dataset + '_l1': 0,
        'ACCESS_' + dataset + '_l2': 0,
        'ACCESS_' + dataset + '_num_patches': 1028,
        'ACCESS_' + dataset + '_batch_size': 32
    }])

    pars = fmin(lambda x: objective_func(my_data, x),
                space=space,
                trials=trials,
                algo=tpe.suggest,
                max_evals=max_evals,
                rstate=np.random.RandomState(random_seed))

    improvements = reduce(improvement_only, trials.losses(), [])

    save_config(resdir, dataset, pars, trials.average_best_error())

    return improvements, pars, trials
Beispiel #20
0
    def runOptim(self, budget, b, initData=None, initResult=None):
        if initData is not None and initResult is not None:
            Xinit = initData[:]
            Yinit = initResult[:]
        else:
            # set a random number
            np.random.seed(self.trial_num)
            # Xinit: points in BO format and Pinit: points in TPE format
            # Yinit: max/auc values and Finit: min/loss values
            Xinit, Pinit, Yinit, Finit = self.initialize_all_methods()

        # get variable names
        bounds_keys = list(self.bounds.keys())
        # compute besty of initial points
        besty = np.max(Yinit)
        # compute bestx of initial points
        besty_idx = np.argmax(Yinit)
        bestp = Pinit[besty_idx]
        # get selected arm for categorical variable (the first parameter)
        first_para_name = bounds_keys[0]
        bestarm = bestp[first_para_name]
        print("n_init: {}, bestarm: {}, bestx: {}, besty: {}".format(len(Finit), bestarm, bestp, round(besty, 4)))

        # store the result for this trial (becoming one col of matrix best_vals)
        result_list = []
        # store the selected arms of initial points
        arm_list = []
        for b_ele_idx in range(b):
            # store selected arms in all iterations for all trials
            self.arm_recommendations.append(bestarm)
            arm_list.append(bestarm)
        # store the bestx and besty of initial points
        result_list.append([0, arm_list, bestp, besty])
        # use initial points for TPE
        tpe_algorithm = partial(tpe.suggest, n_startup_jobs=len(Finit))
        # create trials with initial points
        trials = generate_trials_to_calculate(Pinit, Finit)

        # store best point and best function value so far
        bestx_sofar = []
        besty_sofar = []
        if b > 1:
            budget = int(budget / b) + 1
        for t in range(1, budget):
            print("iteration: {}".format(t))
            # store <batch_size> arms selected in this iteration
            arm_list = []
            # store suggested data points in batch
            x_batch = np.zeros((b, self.n_dim))  # batch_size x dim of a data point
            y_batch = np.zeros(b) # store max function values
            f_batch = np.zeros(b) # store min function values
            # in an iteration, suggest a batch of points
            # only after selecting all points in the batch, we can compute their function values
            for b_ele_idx in range(b):
                # run TPE to suggest the next point which is stored in trials
                best_params = fmin(self.f, self.bounds, tpe_algorithm, len(trials) + 1, trials)
                # get best_x and best_y so far
                bestx_sofar.append(best_params)
                # max/auc of objective function
                best_result = trials.best_trial["result"]["loss"]
                if self.f_type == "func":
                    best_result = -1.0 * best_result
                elif self.f_type == "class":
                    best_result = 1.0 - best_result
                besty_sofar.append(best_result)
                # get selected arm for categorical variable (the first parameter)
                first_para_name = bounds_keys[0]
                arm = int(trials.vals[first_para_name][-1])
                # store selected arms in all iterations for all trials
                self.arm_recommendations.append(arm)
                arm_list.append(arm)
                # get other variables
                x_next = []
                for d in range(1, self.n_dim):
                    para_name = bounds_keys[d]
                    x_next.append(trials.vals[para_name][-1])
                x_batch[b_ele_idx, :] = [arm] + x_next
                # get function value of the next point (indeed, we don't know this function value)
                y_next = trials.results[-1]["loss"]
                f_batch[b_ele_idx] = y_next  # store min function value
                if self.f_type == "func":
                    y_next = -1.0 * y_next
                elif self.f_type == "class":
                    y_next = 1.0 - y_next
                y_batch[b_ele_idx] = y_next  # store max function value
                print("arm_next: {}, x_next: {}, y_next: {}".format(arm, np.around(x_next, 4), round(y_next, 4)))
                if b > 1:
                    # reset trials to suggest the next batch element
                    trials = generate_trials_to_calculate(Pinit, Finit)
            # end batch

            if b > 1:
                # update the data with suggested points in batch
                for ele_idx, x in enumerate(x_batch):
                    point = {bounds_keys[idx]: val for idx, val in enumerate(x)}
                    point[bounds_keys[0]] = int(point[bounds_keys[0]])
                    Pinit.append(point)
                    Finit.append(f_batch[ele_idx])
                # create trails with new batch elements
                trials = generate_trials_to_calculate(Pinit, Finit)

            # instead of computing function values of batch elements,
            # we already have them in y_batch

            for b_ele_idx in range(b):
                # get the best function value till now
                end = (t - 1) * b + (b_ele_idx + 1)
                besty = max(besty_sofar[:end])
                bestx = bestx_sofar[np.argmax(besty_sofar[:end])]
                # get selected arm for categorical variable (the first parameter)
                first_para_name = bounds_keys[0]
                bestarm = bestx[first_para_name]
                print("bestarm: {}, bestx: {}, besty: {}".format(bestarm, bestx, round(besty, 4)))
                # store the results of this iteration
                result_list.append([t, arm_list, bestx, besty])
        if b > 1:
            result_list = result_list[:-1]
        print("Finished ", self.method, " for trial: ", self.trial_num)
        # store the result for all iterations in this trial
        df = pd.DataFrame(result_list, columns=["iter", "arm_list", "best_input", "best_value"])

        return df