예제 #1
0
    def _run_search(self, evaluate_candidates):
        results = {}

        # adjust number of iterations
        n_init = len(self._X_init)
        self._n_initial_points -= n_init
        self._n_iter -= max(0, self._n_initial_points)
        self._n_iter = self._n_iter // self.acq_kwargs['n_pick']  #!

        # Randomly sample initial points
        for _ in range(self._n_initial_points):
            results = evaluate_candidates(
                ParameterSampler(self.param_distributions,
                                 1,
                                 random_state=self.random_state))

            self._save_cv_results(results, self.file)

        # Bayesian optimization
        for _ in range(self._n_iter):
            X_obs, y_obs = self._Xy_observations(results)
            gp = self._gp_fit(X_obs, y_obs)

            results = evaluate_candidates(
                ParameterAcquirer(surrogate=gp,
                                  param_distributions=self.param_distributions,
                                  acq_kwargs=self.acq_kwargs,
                                  random_state=self.random_state))

            self._save_cv_results(results, self.file)
예제 #2
0
    def _run_search(self, evaluate_candidates):
        for _ in range(self._n_iter):
            results = evaluate_candidates(
                ParameterSampler(self.param_distributions,
                                 1,
                                 random_state=self.random_state))

            self._save_cv_results(results, self.file)
예제 #3
0
    def __iter__(self):
        sampler = ParameterSampler(self.param_distributions,
                                   self.acq_kwargs['n_candidates'],
                                   self.random_state)
        candidates = list(sampler)
        X_candidate = _params_to_2darray(candidates)

        #idx = _acq_pick(self.surrogate, X_candidate,
        #                self.acq_func, self.pick_func, self.n_pick,
        #                self.acq_kwargs, self.pick_kwargs)
        idx = self.pick_func(self.surrogate, X_candidate, self.acq_kwargs)

        for i in idx:
            yield candidates[i]
예제 #4
0
    def fit(self, X, y=None, groups=None):
        """Run fit on the estimator with randomly drawn parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.
        """
        sampled_params = ParameterSampler(self.param_distributions,
                                          self.n_iter,
                                          random_state=self.random_state)
        return self._fit(X, y, groups, sampled_params)
예제 #5
0
    def _random_search(self, X, y, n_iter, seed=None):
        # Store random state
        state = np.random.get_state()

        # Set random seed
        if seed is not None:
            np.random.seed(seed)

        say("Randomized search with {} iterations".format(n_iter),
            self.verbose,
            style="title")
        samples = [i for i in ParameterSampler(self.decoded_params, n_iter)]

        for i in tqdm(range(0, n_iter), ascii=True, leave=True):

            # Stop loop if we are out of time
            if self._over_time():
                break

            # If we get close to the max_run_time, we set max_eval_time to the remaining time
            self.optimizer.max_eval_time = int(
                min([self.max_eval_time,
                     self._get_remaining_time()]))

            # Evaluate sample
            setting = samples[i]
            say("Iteration {}/{}.".format(i + 1, n_iter),
                self.verbose,
                style="subtitle")
            self.optimizer.evaluate(setting, X, y)

            # Manually add a maximize time of 0, since we don't use the maximize method
            self.optimizer.maximize_times.append(0)

        # Restore random state
        np.random.set_state(state)

        # Restore max_eval_time
        self.optimizer.max_eval_time = self.max_eval_time
예제 #6
0
    def fit(self, X, y, groups=None):
        """Actual fitting,  performing the search over parameters."""
        num_arms = self.eta**(self.num_steps - 1)
        parameter_iterable = ParameterSampler(self.param_distributions,
                                              num_arms,
                                              random_state=self.random_state)

        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        X, y, groups = indexable(X, y, groups)
        n_splits = cv.get_n_splits(X, y, groups)
        if self.verbose > 0 and isinstance(parameter_iterable, Sized):
            n_candidates = len(parameter_iterable)
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        base_estimator = clone(self.estimator)

        results, best_index, best_parameters = self._successive_halving(
            X, y, groups, cv, self.eta, self.num_steps - 1, self.num_steps - 1)

        self.cv_results_ = results
        self.best_index_ = best_index
        self.n_splits_ = n_splits

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best_parameters)
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator
        return self
예제 #7
0
    def _run_search(self, evaluate_candidates):
        self._validate_input()

        s_max = int(np.floor(np.log(self.max_iter / self.min_iter) / np.log(self.eta)))
        B = (s_max + 1) * self.max_iter

        refit_metric = self.refit if self.multimetric_ else 'score'
        random_state = check_random_state(self.random_state)

        if self.skip_last > s_max:
            raise ValueError('skip_last is higher than the total number of rounds')

        for round_index, s in enumerate(reversed(range(s_max + 1))):
            n = int(np.ceil(int(B / self.max_iter / (s + 1)) * np.power(self.eta, s)))

            # initial number of iterations per config
            r = self.max_iter / np.power(self.eta, s)
            configurations = list(ParameterSampler(param_distributions=self.param_distributions,
                                                   n_iter=n,
                                                   random_state=random_state))

            if self.verbose > 0:
                print('Starting bracket {0} (out of {1}) of hyperband'
                      .format(round_index + 1, s_max + 1))

            for i in range((s + 1) - self.skip_last):

                n_configs = np.floor(n / np.power(self.eta, i))  # n_i
                n_iterations = int(r * np.power(self.eta, i))  # r_i
                n_to_keep = int(np.floor(n_configs / self.eta))

                if self.verbose > 0:
                    msg = ('Starting successive halving iteration {0} out of'
                           ' {1}. Fitting {2} configurations, with'
                           ' resource_param {3} set to {4}')

                    if n_to_keep > 0:
                        msg += ', and keeping the best {5} configurations.'

                    msg = msg.format(i + 1, s + 1, len(configurations),
                                     self.resource_param, n_iterations,
                                     n_to_keep)
                    print(msg)

                # Set the cost parameter for every configuration
                parameters = copy.deepcopy(configurations)
                for configuration in parameters:
                    configuration[self.resource_param] = n_iterations

                results = evaluate_candidates(parameters)

                if n_to_keep > 0:
                    top_configurations = [x for _, x in sorted(zip(results['rank_test_%s' % refit_metric],
                                                                   results['params']),
                                                               key=lambda x: x[0])]

                    configurations = top_configurations[:n_to_keep]

            if self.skip_last > 0:
                print('Skipping the last {0} successive halving iterations'
                      .format(self.skip_last))
예제 #8
0
 def _get_param_iterator(self):
     """ Return ParameterSampler instance for the given distributions """
     return ParameterSampler(
         self.param_distributions, self.n_iter,
         random_state=self.random_state)
예제 #9
0
 def _run_search(self, evaluate_candidates):
     """Search n_iter candidates from param_distributions"""
     evaluate_candidates(
         ParameterSampler(self.param_distributions,
                          self.n_iter,
                          random_state=self.random_state))
예제 #10
0
    def _successive_halving(self,
                            X,
                            y,
                            groups,
                            cv,
                            eta,
                            hyperband_s,
                            hyperband_smax=None):
        results = dict()
        best_index = None

        hyperband_B = hyperband_smax + 1 if hyperband_smax is not None else hyperband_s
        print(hyperband_B, eta, hyperband_s, (hyperband_s + 1))
        hyperband_n = math.ceil(hyperband_B * eta**hyperband_s /
                                (hyperband_s + 1))
        print('- bracket %d; B = %d, n = %d' %
              (hyperband_s, hyperband_B, hyperband_n))

        parameter_iterable = ParameterSampler(self.param_distributions,
                                              hyperband_n,
                                              random_state=self.random_state +
                                              hyperband_s)

        for hyperband_i in range(0, hyperband_s + 1):
            sample_size = int(len(X) * (eta**-(hyperband_s - hyperband_i)))

            arms_pulled = 0
            if 'mean_test_score' in results:
                arms_pulled = len(results['mean_test_score'])

            if groups is not None:
                X_resampled, y_resampled, groups_resampled = resample(
                    X,
                    y,
                    groups,
                    n_samples=sample_size,
                    replace=False,
                    random_state=self.random_state)
            else:
                X_resampled, y_resampled = resample(X,
                                                    y,
                                                    n_samples=sample_size,
                                                    replace=False)
                groups_resampled = None

            print('-- iteration %d sample size %d arms %d' %
                  (hyperband_i, sample_size, len(parameter_iterable)))
            res = self._do_iteration(X_resampled, y_resampled,
                                     groups_resampled, sample_size,
                                     parameter_iterable, cv, eta)
            results_iteration, parameter_iterable, best_index_iteration, best_parameters_iteration = res

            # TODO: This assumes we always take the index from the highest bracket.
            best_index = arms_pulled + best_index_iteration
            best_parameters = best_parameters_iteration

            for key, values in results_iteration.items():
                if key not in results:
                    results[key] = values
                else:
                    results[key] = np.append(results[key], values)

        return results, best_index, best_parameters
예제 #11
0
import pandas as pd
import json


from sklearn.svm import SVC
import scipy
import glob


X = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
y = [0, 0, 0, 1, 1, 1]
groups = None
parameter_iterable = {'kernel': ['linear'], 'C': [0.5e8, 1e8], 'coef0': [0.5, 1],
                      'FeatSel_Variance': ['True']}
n_iter = 4
parameter_iterable = ParameterSampler(parameter_iterable, 4)


def fit(X, y, groups, parameter_iterable):
    """Actual fitting,  performing the search over parameters."""
    estimator = SVC(class_weight='balanced', probability=True)
    cv = 2
    scoring = 'f1_weighted'
    verbose = True
    fit_params = None
    return_train_score = True
    error_score = 'raise'

    estimator = estimator
    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorer_ = check_scoring(estimator, scoring=scoring)