def _run_search(self, evaluate_candidates): results = {} # adjust number of iterations n_init = len(self._X_init) self._n_initial_points -= n_init self._n_iter -= max(0, self._n_initial_points) self._n_iter = self._n_iter // self.acq_kwargs['n_pick'] #! # Randomly sample initial points for _ in range(self._n_initial_points): results = evaluate_candidates( ParameterSampler(self.param_distributions, 1, random_state=self.random_state)) self._save_cv_results(results, self.file) # Bayesian optimization for _ in range(self._n_iter): X_obs, y_obs = self._Xy_observations(results) gp = self._gp_fit(X_obs, y_obs) results = evaluate_candidates( ParameterAcquirer(surrogate=gp, param_distributions=self.param_distributions, acq_kwargs=self.acq_kwargs, random_state=self.random_state)) self._save_cv_results(results, self.file)
def _run_search(self, evaluate_candidates): for _ in range(self._n_iter): results = evaluate_candidates( ParameterSampler(self.param_distributions, 1, random_state=self.random_state)) self._save_cv_results(results, self.file)
def __iter__(self): sampler = ParameterSampler(self.param_distributions, self.acq_kwargs['n_candidates'], self.random_state) candidates = list(sampler) X_candidate = _params_to_2darray(candidates) #idx = _acq_pick(self.surrogate, X_candidate, # self.acq_func, self.pick_func, self.n_pick, # self.acq_kwargs, self.pick_kwargs) idx = self.pick_func(self.surrogate, X_candidate, self.acq_kwargs) for i in idx: yield candidates[i]
def fit(self, X, y=None, groups=None): """Run fit on the estimator with randomly drawn parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. """ sampled_params = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) return self._fit(X, y, groups, sampled_params)
def _random_search(self, X, y, n_iter, seed=None): # Store random state state = np.random.get_state() # Set random seed if seed is not None: np.random.seed(seed) say("Randomized search with {} iterations".format(n_iter), self.verbose, style="title") samples = [i for i in ParameterSampler(self.decoded_params, n_iter)] for i in tqdm(range(0, n_iter), ascii=True, leave=True): # Stop loop if we are out of time if self._over_time(): break # If we get close to the max_run_time, we set max_eval_time to the remaining time self.optimizer.max_eval_time = int( min([self.max_eval_time, self._get_remaining_time()])) # Evaluate sample setting = samples[i] say("Iteration {}/{}.".format(i + 1, n_iter), self.verbose, style="subtitle") self.optimizer.evaluate(setting, X, y) # Manually add a maximize time of 0, since we don't use the maximize method self.optimizer.maximize_times.append(0) # Restore random state np.random.set_state(state) # Restore max_eval_time self.optimizer.max_eval_time = self.max_eval_time
def fit(self, X, y, groups=None): """Actual fitting, performing the search over parameters.""" num_arms = self.eta**(self.num_steps - 1) parameter_iterable = ParameterSampler(self.param_distributions, num_arms, random_state=self.random_state) estimator = self.estimator cv = check_cv(self.cv, y, classifier=is_classifier(estimator)) self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) X, y, groups = indexable(X, y, groups) n_splits = cv.get_n_splits(X, y, groups) if self.verbose > 0 and isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) base_estimator = clone(self.estimator) results, best_index, best_parameters = self._successive_halving( X, y, groups, cv, self.eta, self.num_steps - 1, self.num_steps - 1) self.cv_results_ = results self.best_index_ = best_index self.n_splits_ = n_splits if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best_parameters) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self
def _run_search(self, evaluate_candidates): self._validate_input() s_max = int(np.floor(np.log(self.max_iter / self.min_iter) / np.log(self.eta))) B = (s_max + 1) * self.max_iter refit_metric = self.refit if self.multimetric_ else 'score' random_state = check_random_state(self.random_state) if self.skip_last > s_max: raise ValueError('skip_last is higher than the total number of rounds') for round_index, s in enumerate(reversed(range(s_max + 1))): n = int(np.ceil(int(B / self.max_iter / (s + 1)) * np.power(self.eta, s))) # initial number of iterations per config r = self.max_iter / np.power(self.eta, s) configurations = list(ParameterSampler(param_distributions=self.param_distributions, n_iter=n, random_state=random_state)) if self.verbose > 0: print('Starting bracket {0} (out of {1}) of hyperband' .format(round_index + 1, s_max + 1)) for i in range((s + 1) - self.skip_last): n_configs = np.floor(n / np.power(self.eta, i)) # n_i n_iterations = int(r * np.power(self.eta, i)) # r_i n_to_keep = int(np.floor(n_configs / self.eta)) if self.verbose > 0: msg = ('Starting successive halving iteration {0} out of' ' {1}. Fitting {2} configurations, with' ' resource_param {3} set to {4}') if n_to_keep > 0: msg += ', and keeping the best {5} configurations.' msg = msg.format(i + 1, s + 1, len(configurations), self.resource_param, n_iterations, n_to_keep) print(msg) # Set the cost parameter for every configuration parameters = copy.deepcopy(configurations) for configuration in parameters: configuration[self.resource_param] = n_iterations results = evaluate_candidates(parameters) if n_to_keep > 0: top_configurations = [x for _, x in sorted(zip(results['rank_test_%s' % refit_metric], results['params']), key=lambda x: x[0])] configurations = top_configurations[:n_to_keep] if self.skip_last > 0: print('Skipping the last {0} successive halving iterations' .format(self.skip_last))
def _get_param_iterator(self): """ Return ParameterSampler instance for the given distributions """ return ParameterSampler( self.param_distributions, self.n_iter, random_state=self.random_state)
def _run_search(self, evaluate_candidates): """Search n_iter candidates from param_distributions""" evaluate_candidates( ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state))
def _successive_halving(self, X, y, groups, cv, eta, hyperband_s, hyperband_smax=None): results = dict() best_index = None hyperband_B = hyperband_smax + 1 if hyperband_smax is not None else hyperband_s print(hyperband_B, eta, hyperband_s, (hyperband_s + 1)) hyperband_n = math.ceil(hyperband_B * eta**hyperband_s / (hyperband_s + 1)) print('- bracket %d; B = %d, n = %d' % (hyperband_s, hyperband_B, hyperband_n)) parameter_iterable = ParameterSampler(self.param_distributions, hyperband_n, random_state=self.random_state + hyperband_s) for hyperband_i in range(0, hyperband_s + 1): sample_size = int(len(X) * (eta**-(hyperband_s - hyperband_i))) arms_pulled = 0 if 'mean_test_score' in results: arms_pulled = len(results['mean_test_score']) if groups is not None: X_resampled, y_resampled, groups_resampled = resample( X, y, groups, n_samples=sample_size, replace=False, random_state=self.random_state) else: X_resampled, y_resampled = resample(X, y, n_samples=sample_size, replace=False) groups_resampled = None print('-- iteration %d sample size %d arms %d' % (hyperband_i, sample_size, len(parameter_iterable))) res = self._do_iteration(X_resampled, y_resampled, groups_resampled, sample_size, parameter_iterable, cv, eta) results_iteration, parameter_iterable, best_index_iteration, best_parameters_iteration = res # TODO: This assumes we always take the index from the highest bracket. best_index = arms_pulled + best_index_iteration best_parameters = best_parameters_iteration for key, values in results_iteration.items(): if key not in results: results[key] = values else: results[key] = np.append(results[key], values) return results, best_index, best_parameters
import pandas as pd import json from sklearn.svm import SVC import scipy import glob X = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] y = [0, 0, 0, 1, 1, 1] groups = None parameter_iterable = {'kernel': ['linear'], 'C': [0.5e8, 1e8], 'coef0': [0.5, 1], 'FeatSel_Variance': ['True']} n_iter = 4 parameter_iterable = ParameterSampler(parameter_iterable, 4) def fit(X, y, groups, parameter_iterable): """Actual fitting, performing the search over parameters.""" estimator = SVC(class_weight='balanced', probability=True) cv = 2 scoring = 'f1_weighted' verbose = True fit_params = None return_train_score = True error_score = 'raise' estimator = estimator cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer_ = check_scoring(estimator, scoring=scoring)