def optimize(self, X: csr_matrix, y: numpy.ndarray) -> Tuple[float, Mapping[str, Any]]: """ Conduct hyper-parameters search to find the best base model given the data. :param X: Sparse feature matrix. :param y: Labels numpy array. :return: Best base model score and parameters. """ cost_function = use_named_args(_dimensions)(partial(self._cost, X=X, y=y)) def _minimize() -> OptimizeResult: return gp_minimize(cost_function, _dimensions, n_calls=self.n_iter, random_state=self.random_state, verbose=True) if not logs_are_structured: # fool the check in joblib - everything still works without it # this trick allows to run parallel bscv.fit() from unittest.mock import patch with patch("threading._MainThread", Thread): self._log.debug("patched joblib") res = _minimize() else: res = _minimize() best_score = -res.fun best_params = {dim.name: x for x, dim in zip(res.x, _dimensions)} return best_score, best_params
def main(args): # The aggregated output from previous trials. output_name = os.path.join(args.output_dir, 'params_scores.yaml') if tf.gfile.Exists(output_name): x0, y0 = load_previous_trials(output_name) else: # No previous trial, create a file to record scores. x0 = None y0 = None with tf.gfile.GFile(output_name, 'w') as f: f.write('') subprocess_env = { 'PROJECT_ID': args.project_id, 'LOCATION': args.location, 'TPU_NAME': args.tpu_name, # OUTPUT_DIR holds results from the whole optimation job (`args.n_calls` trials). 'OUTPUT_DIR': args.output_dir, 'OUTPUT_NAME': output_name, # MODEL_DIR is cleared at the start of each trial. 'MODEL_DIR': os.path.join(args.output_dir, 'model_dir') } # create the objective function with runtime arguments profile_tpu = make_profile_tpu(subprocess_env) profile_tpu = use_named_args(space)(profile_tpu) gp_minimize(profile_tpu, space, n_calls=args.n_calls, n_random_starts=5, x0=x0, y0=y0)
def parameter_search(objective, space, n_calls, n_random_starts=3, acq_optimizer="auto", n_jobs=4): """ :param Callable[[Any], scalar] objective: The objective function that we're trying to optimize :param dict[str, Dimension] space: :param n_calls: :param n_random_starts: :param acq_optimizer: :return Generator[{'names': List[str], 'x_iters': List[]: """ # TODO: Finish building this from skopt import gp_minimize # Soft requirements are imported in here. from skopt.utils import use_named_args for k, var in space.items(): var.name=k space = list(space.values()) objective = use_named_args(space)(objective) iter = Iteratorize( func = lambda callback: gp_minimize(objective, dimensions=space, n_calls=n_calls, n_random_starts = n_random_starts, random_state=1234, n_jobs=n_jobs, verbose=False, callback=callback, acq_optimizer = acq_optimizer, ), ) for i, iter_info in enumerate(iter): yield iter_info
def parse_config(params_config): bopt_space = _transform_hparams_dict(params_config) decorator = use_named_args(bopt_space) return decorator
def train(self): """Main training function.""" self.timing.start() dimensions = self._create_dimensions() hyperparameters = self._create_hyprparameters_domain() with tf.summary.create_file_writer( str(Path.cwd().joinpath("output", "logs", "hparam_tuning"))).as_default(): hp.hparams_config( hparams=hyperparameters, metrics=[hp.Metric("accuracy", display_name="Accuracy")], ) ( network_settings, train_settings, preprocess_settings, ) = parseConfigsFile(["network", "train", "preprocess"]) BATCH_SIZE = train_settings[ "batch_size"] * self.strategy.num_replicas_in_sync ( synthetic_train, synthetic_test, synthetic_dataset_len, synthetic_num_classes, ) = self._get_datasets(BATCH_SIZE) srfr_model, discriminator_model = self._instantiate_models( synthetic_num_classes, network_settings, preprocess_settings) train_model_sr_only_use_case = TrainModelSrOnlyUseCase( self.strategy, TimingLogger(), self.logger, BATCH_SIZE, synthetic_dataset_len, ) _training = partial( self._fitness_function, train_model_use_case=train_model_sr_only_use_case, srfr_model=srfr_model, discriminator_model=discriminator_model, batch_size=BATCH_SIZE, synthetic_train=synthetic_train, synthetic_test=synthetic_test, num_classes=synthetic_num_classes, train_settings=train_settings, hparams=hyperparameters, ) _train = use_named_args(dimensions=dimensions)(_training) initial_parameters = [0.0002, 0.9, 1.0, 0.005, 0.01] search_result = gp_minimize( func=_train, dimensions=dimensions, acq_func="EI", n_calls=20, x0=initial_parameters, ) self.logger.info(f"Best hyperparameters: {search_result.x}")
def main(): """Main training function.""" timing = TimingLogger() timing.start() strategy = tf.distribute.MirroredStrategy() # strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") dimensions = _create_dimensions() hyperparameters = _create_hyprparameters_domain() with tf.summary.create_file_writer( str(Path.cwd().joinpath("output", "logs", "hparam_tuning"))).as_default(): hp.hparams_config( hparams=hyperparameters, metrics=[hp.Metric("accuracy", display_name="Accuracy")], ) ( network_settings, train_settings, preprocess_settings, ) = parseConfigsFile(["network", "train", "preprocess"]) BATCH_SIZE = train_settings["batch_size"] * strategy.num_replicas_in_sync ( synthetic_train, synthetic_test, synthetic_dataset_len, synthetic_num_classes, ) = _get_datasets(BATCH_SIZE, strategy) srfr_model, discriminator_model = _instantiate_models( strategy, synthetic_num_classes, network_settings, preprocess_settings) train_model_use_case = TrainModelJointLearnUseCase( strategy, TimingLogger(), LOGGER, BATCH_SIZE, synthetic_dataset_len, ) _training = partial( _instantiate_training, strategy=strategy, train_model_use_case=train_model_use_case, srfr_model=srfr_model, discriminator_model=discriminator_model, batch_size=BATCH_SIZE, synthetic_train=synthetic_train, synthetic_test=synthetic_test, num_classes=synthetic_num_classes, train_settings=train_settings, hparams=hyperparameters, ) _train = use_named_args(dimensions=dimensions)(_training) search_result = gp_minimize(func=_train, dimensions=dimensions, acq_func="EI", n_calls=20) LOGGER.info(f"Best hyperparameters: {search_result.x}")