def test_create_study_command_with_skip_if_exists(): # type: () -> None with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url, config_path): storage = RDBStorage(storage_url) study_name = 'test_study' # Create study with name. command = [ 'optuna', 'create-study', '--storage', storage_url, '--study-name', study_name ] study_name = str(subprocess.check_output(command).decode().strip()) # Check if study_name is stored in the storage. study_id = storage.get_study_id_from_name(study_name) assert storage.get_study_name_from_id(study_id) == study_name # Try to create the same name study without `--skip-if-exists` flag (error). command = [ 'optuna', 'create-study', '--storage', storage_url, '--study-name', study_name ] with pytest.raises(subprocess.CalledProcessError): subprocess.check_output(command) # Try to create the same name study with `--skip-if-exists` flag (OK). command = [ 'optuna', 'create-study', '--storage', storage_url, '--study-name', study_name, '--skip-if-exists' ] study_name = str(subprocess.check_output(command).decode().strip()) new_study_id = storage.get_study_id_from_name(study_name) assert study_id == new_study_id # The existing study instance is reused.
def test_create_study_command_with_direction(): # type: () -> None with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url, config_path): storage = RDBStorage(storage_url) command = [ 'optuna', 'create-study', '--storage', storage_url, '--direction', 'minimize' ] study_name = str(subprocess.check_output(command).decode().strip()) study_id = storage.get_study_id_from_name(study_name) assert storage.get_study_direction( study_id) == optuna.structs.StudyDirection.MINIMIZE command = [ 'optuna', 'create-study', '--storage', storage_url, '--direction', 'maximize' ] study_name = str(subprocess.check_output(command).decode().strip()) study_id = storage.get_study_id_from_name(study_name) assert storage.get_study_direction( study_id) == optuna.structs.StudyDirection.MAXIMIZE command = [ 'optuna', 'create-study', '--storage', storage_url, '--direction', 'test' ] # --direction should be either 'minimize' or 'maximize'. with pytest.raises(subprocess.CalledProcessError): subprocess.check_call(command)
def test_study_set_user_attr_command(options): # type: (List[str]) -> None with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url, config_path): storage = RDBStorage(storage_url) # Create study. study_name = storage.get_study_name_from_id(storage.create_new_study()) base_command = [ 'optuna', 'study', 'set-user-attr', '--study', study_name ] base_command = _add_option(base_command, '--storage', storage_url, 'storage' in options) base_command = _add_option(base_command, '--config', config_path, 'config' in options) example_attrs = {'architecture': 'ResNet', 'baselen_score': '0.002'} for key, value in example_attrs.items(): subprocess.check_call(base_command + ['--key', key, '--value', value]) # Attrs should be stored in storage. study_id = storage.get_study_id_from_name(study_name) study_user_attrs = storage.get_study_user_attrs(study_id) assert len(study_user_attrs) == 2 assert all( [study_user_attrs[k] == v for k, v in example_attrs.items()])
def test_create_study_command_with_study_name(): # type: () -> None with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url, config_path): storage = RDBStorage(storage_url) study_name = 'test_study' # Create study with name. command = ['optuna', 'create-study', '--storage', storage_url, '--study-name', study_name] study_name = str(subprocess.check_output(command).decode().strip()) # Check if study_name is stored in the storage. study_id = storage.get_study_id_from_name(study_name) assert storage.get_study_name_from_id(study_id) == study_name
def test_create_study_command(options): # type: (List[str]) -> None with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url, config_path): storage = RDBStorage(storage_url) # Create study. command = ['optuna', 'create-study'] command = _add_option(command, '--storage', storage_url, 'storage' in options) command = _add_option(command, '--config', config_path, 'config' in options) subprocess.check_call(command) # Command output should be in name string format (no-name + UUID). study_name = str(subprocess.check_output(command).decode().strip()) name_re = r'^no-name-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$' assert re.match(name_re, study_name) is not None # study_name should be stored in storage. study_id = storage.get_study_id_from_name(study_name) assert study_id == 2
def autopilot( X: InteractionMatrix, evaluator: Evaluator, n_trials: int = 20, memory_budget: int = 4000, # 4GB timeout_overall: Optional[int] = None, timeout_singlestep: Optional[int] = None, algorithms: List[str] = DEFAULT_SEARCHNAMES, random_seed: Optional[int] = None, logger: Optional[Logger] = None, callback: Optional[Callable[[int, pd.DataFrame], None]] = None, storage: Optional[RDBStorage] = None, study_name: Optional[str] = None, task_resource_provider: Type[TaskBackend] = MultiProcessingBackend, ) -> Tuple[Type[BaseRecommender], Dict[str, Any], pd.DataFrame]: r"""Given an interaction matrix and an evaluator, search for the best algorithm and its parameters (roughly) within the time & space constraints. You can specify how each search step will be executed. Args: X: Input interaction matrix. evaluator: Evaluator to measure the performance of the recommenders. n_trials: The maximal number of trials. Defaults to 20. memory_budget: Optimizers will try search parameters so that memory usage (in megabyte) will not exceed this values. An algorithm will not be searched if it inevitably violates this bound. Note that this value is quite rough one and will not be respected strictly. timeout_overall: If set, the total execution time of the trials will not exceed this value (roughly). timeout_singlestep: If set, a single trial (recommender and a set of its parameter) will not run for more than the value (in seconds). Such a trial is considered to have produced a score value of 0, and optuna will avoid suggesting such values (if everything works fine). Defaults to `None`. algorithms: A list of algorithm names to be tried. Defaults to `["RP3beta", "IALS", "DenseSLIM", "AsymmetricCosineKNN", "SLIM"]`. random_seed: The random seed that controls the suggestion behavior. Defaults to `None`. logger: The logger to be used. If `None`, irspack's default logger will be used. Defaults to None. callback: If not `None`, called at the end of every single trial with the following arguments 1. The current trial's number. 2. A `pd.DataFrame` that holds history of trial execution. Defaults to `None`. storage: An instance of `optuna.storages.RDBStorage`. Defaults to `None`. study_name: If `storage` argument is given, you have to pass study_name argument. task_resource_provider: Specifies how each search step is executed. Defaults to `MultiProceesingBackend`. Raises: ValueError: If `storage` is given but `study_name` is not specified. RuntimeError: If no recommender algorithms are available within given memory budget. RuntimeError: If no trials have been completed within given timeout. Returns: * The best algorithm's recommender class. * The best parameters. * The dataframe containing the history of trials. """ if storage is not None and study_name is None: raise ValueError('"study_name" must be specified if "storage" is given.') RNS = np.random.RandomState(random_seed) suggest_overwrites: Dict[str, List[Suggestion]] = {} optimizer_names: List[str] = [] for rec_name in algorithms: optimizer_class_name = rec_name + "Optimizer" optimizer_class = get_optimizer_class(optimizer_class_name) try: suggest_overwrites[ optimizer_class_name ] = optimizer_class.tune_range_given_memory_budget(X, memory_budget) optimizer_names.append(optimizer_class_name) except LowMemoryError: continue if not optimizer_names: raise RuntimeError("No available algorithm with given memory.") if logger is None: logger = get_default_logger() logger.info("Trying the following algorithms: %s", optimizer_names) optional_db_path = Path(f".autopilot-{uuid1()}.db") storage_: RDBStorage if storage is None: storage_ = RDBStorage( url=f"sqlite:///{optional_db_path.name}", ) else: storage_ = storage if study_name is None: study_name_ = f"autopilot-{uuid1()}" else: study_name_ = study_name start = time.time() study = optuna.create_study( storage=storage_, study_name=study_name_, load_if_exists=True ) study_id = storage_.get_study_id_from_name(study_name_) for _ in range(n_trials): task_start = time.time() elapsed_at_start = task_start - start timeout_for_this_process: Optional[int] = None if timeout_overall is None: timeout_for_this_process = timeout_singlestep else: remaining_time = int(timeout_overall - elapsed_at_start) if remaining_time <= 0: break if timeout_singlestep is not None: timeout_for_this_process = min(remaining_time, timeout_singlestep) task = task_resource_provider( X, evaluator, optimizer_names, suggest_overwrites, storage_.url, study_name_, RNS.randint(0, np.iinfo(np.int32).max, dtype=np.int32), logger, ) task.start() trial_number = task.receive_trial_number() task.join(timeout=timeout_for_this_process) if task.exit_code is None: task.terminate() try: logger.info(f"Trial {trial_number} timeout.") storage_.read_trials_from_remote_storage(study_id) trial_id = storage_.get_trial_id_from_study_id_trial_number( study_id, trial_number ) trial_this = storage_.get_trial(trial_id) intermediate_values = sorted( list(trial_this.intermediate_values.items()), key=_sort_intermediate, ) if intermediate_values: # Though terminated, it resulted in some values. # Regard it as a COMPLETE trial. storage_.set_trial_values( trial_id, [intermediate_values[0][1]], ) storage_.set_trial_user_attr( trial_id, "max_epoch", intermediate_values[0][0] + 1 ) else: # Penalize such a time-consuming trial storage_.set_trial_values(trial_id, [0.0]) storage_.set_trial_state(trial_id, TrialState.COMPLETE) except RuntimeError: # pragma: no cover pass # pragma: no cover if callback is not None: callback(trial_number, study_to_dataframe(study)) now = time.time() elapsed = now - start if timeout_overall is not None: if elapsed > timeout_overall: break best_params_with_prefix = dict( **study.best_trial.params, **{ key: val for key, val in study.best_trial.user_attrs.items() if is_valid_param_name(key) }, ) best_params = { re.sub(r"^([^\.]*\.)", "", key): value for key, value in best_params_with_prefix.items() } optimizer_name: str = best_params.pop("optimizer_name") result_df = study_to_dataframe(study) if storage is None: optional_db_path.unlink() recommender_class = get_optimizer_class(optimizer_name).recommender_class return (recommender_class, best_params, result_df)