Example #1
0
def test_create_study_command_with_skip_if_exists():
    # type: () -> None

    with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url,
                                                         config_path):
        storage = RDBStorage(storage_url)
        study_name = 'test_study'

        # Create study with name.
        command = [
            'optuna', 'create-study', '--storage', storage_url, '--study-name',
            study_name
        ]
        study_name = str(subprocess.check_output(command).decode().strip())

        # Check if study_name is stored in the storage.
        study_id = storage.get_study_id_from_name(study_name)
        assert storage.get_study_name_from_id(study_id) == study_name

        # Try to create the same name study without `--skip-if-exists` flag (error).
        command = [
            'optuna', 'create-study', '--storage', storage_url, '--study-name',
            study_name
        ]
        with pytest.raises(subprocess.CalledProcessError):
            subprocess.check_output(command)

        # Try to create the same name study with `--skip-if-exists` flag (OK).
        command = [
            'optuna', 'create-study', '--storage', storage_url, '--study-name',
            study_name, '--skip-if-exists'
        ]
        study_name = str(subprocess.check_output(command).decode().strip())
        new_study_id = storage.get_study_id_from_name(study_name)
        assert study_id == new_study_id  # The existing study instance is reused.
Example #2
0
def test_create_study_command_with_direction():
    # type: () -> None

    with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url,
                                                         config_path):
        storage = RDBStorage(storage_url)

        command = [
            'optuna', 'create-study', '--storage', storage_url, '--direction',
            'minimize'
        ]
        study_name = str(subprocess.check_output(command).decode().strip())
        study_id = storage.get_study_id_from_name(study_name)
        assert storage.get_study_direction(
            study_id) == optuna.structs.StudyDirection.MINIMIZE

        command = [
            'optuna', 'create-study', '--storage', storage_url, '--direction',
            'maximize'
        ]
        study_name = str(subprocess.check_output(command).decode().strip())
        study_id = storage.get_study_id_from_name(study_name)
        assert storage.get_study_direction(
            study_id) == optuna.structs.StudyDirection.MAXIMIZE

        command = [
            'optuna', 'create-study', '--storage', storage_url, '--direction',
            'test'
        ]

        # --direction should be either 'minimize' or 'maximize'.
        with pytest.raises(subprocess.CalledProcessError):
            subprocess.check_call(command)
Example #3
0
def test_study_set_user_attr_command(options):
    # type: (List[str]) -> None

    with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url,
                                                         config_path):
        storage = RDBStorage(storage_url)

        # Create study.
        study_name = storage.get_study_name_from_id(storage.create_new_study())

        base_command = [
            'optuna', 'study', 'set-user-attr', '--study', study_name
        ]
        base_command = _add_option(base_command, '--storage', storage_url,
                                   'storage' in options)
        base_command = _add_option(base_command, '--config', config_path,
                                   'config' in options)

        example_attrs = {'architecture': 'ResNet', 'baselen_score': '0.002'}
        for key, value in example_attrs.items():
            subprocess.check_call(base_command +
                                  ['--key', key, '--value', value])

        # Attrs should be stored in storage.
        study_id = storage.get_study_id_from_name(study_name)
        study_user_attrs = storage.get_study_user_attrs(study_id)
        assert len(study_user_attrs) == 2
        assert all(
            [study_user_attrs[k] == v for k, v in example_attrs.items()])
Example #4
0
def test_create_study_command_with_study_name():
    # type: () -> None

    with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url, config_path):
        storage = RDBStorage(storage_url)
        study_name = 'test_study'

        # Create study with name.
        command = ['optuna', 'create-study', '--storage', storage_url, '--study-name', study_name]
        study_name = str(subprocess.check_output(command).decode().strip())

        # Check if study_name is stored in the storage.
        study_id = storage.get_study_id_from_name(study_name)
        assert storage.get_study_name_from_id(study_id) == study_name
Example #5
0
def test_create_study_command(options):
    # type: (List[str]) -> None

    with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url, config_path):
        storage = RDBStorage(storage_url)

        # Create study.
        command = ['optuna', 'create-study']
        command = _add_option(command, '--storage', storage_url, 'storage' in options)
        command = _add_option(command, '--config', config_path, 'config' in options)
        subprocess.check_call(command)

        # Command output should be in name string format (no-name + UUID).
        study_name = str(subprocess.check_output(command).decode().strip())
        name_re = r'^no-name-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$'
        assert re.match(name_re, study_name) is not None

        # study_name should be stored in storage.
        study_id = storage.get_study_id_from_name(study_name)
        assert study_id == 2
Example #6
0
def autopilot(
    X: InteractionMatrix,
    evaluator: Evaluator,
    n_trials: int = 20,
    memory_budget: int = 4000,  # 4GB
    timeout_overall: Optional[int] = None,
    timeout_singlestep: Optional[int] = None,
    algorithms: List[str] = DEFAULT_SEARCHNAMES,
    random_seed: Optional[int] = None,
    logger: Optional[Logger] = None,
    callback: Optional[Callable[[int, pd.DataFrame], None]] = None,
    storage: Optional[RDBStorage] = None,
    study_name: Optional[str] = None,
    task_resource_provider: Type[TaskBackend] = MultiProcessingBackend,
) -> Tuple[Type[BaseRecommender], Dict[str, Any], pd.DataFrame]:

    r"""Given an interaction matrix and an evaluator, search for the best algorithm and its parameters
    (roughly) within the time & space constraints. You can specify how each search step will be executed.

    Args:
        X:
            Input interaction matrix.
        evaluator:
            Evaluator to measure the performance of the recommenders.
        n_trials: The maximal number of trials. Defaults to 20.
        memory_budget:
            Optimizers will try search parameters so that memory usage (in megabyte) will not exceed this values.
            An algorithm will not be searched if it inevitably violates this bound.
            Note that this value is quite rough one and will not be respected strictly.
        timeout_overall:
            If set, the total execution time of the trials will not exceed this value (roughly).
        timeout_singlestep:
            If set, a single trial (recommender and a set of its parameter) will not run for more than the value (in seconds).
            Such a trial is considered to have produced  a score value of 0,
            and optuna will avoid suggesting such values (if everything works fine).
            Defaults to `None`.
        algorithms:
            A list of algorithm names to be tried.
            Defaults to `["RP3beta", "IALS", "DenseSLIM", "AsymmetricCosineKNN", "SLIM"]`.
        random_seed:
            The random seed that controls the suggestion behavior.
            Defaults to `None`.
        logger:
            The logger to be used. If `None`, irspack's default logger will be used.
            Defaults to None.
        callback:
            If not `None`, called at the end of every single trial with the following arguments

                1. The current trial's number.
                2. A `pd.DataFrame` that holds history of trial execution.

            Defaults to `None`.
        storage:
            An instance of `optuna.storages.RDBStorage`. Defaults to `None`.
        study_name:
            If `storage` argument is given, you have to pass study_name
            argument.
        task_resource_provider:
            Specifies how each search step is executed. Defaults to `MultiProceesingBackend`.
    Raises:
        ValueError:
            If `storage` is given but `study_name` is not specified.
        RuntimeError:
            If no recommender algorithms are available within given memory budget.
        RuntimeError:
            If no trials have been completed within given timeout.


    Returns:

        * The best algorithm's recommender class.
        * The best parameters.
        * The dataframe containing the history of trials.

    """
    if storage is not None and study_name is None:
        raise ValueError('"study_name" must be specified if "storage" is given.')
    RNS = np.random.RandomState(random_seed)
    suggest_overwrites: Dict[str, List[Suggestion]] = {}
    optimizer_names: List[str] = []
    for rec_name in algorithms:
        optimizer_class_name = rec_name + "Optimizer"
        optimizer_class = get_optimizer_class(optimizer_class_name)
        try:
            suggest_overwrites[
                optimizer_class_name
            ] = optimizer_class.tune_range_given_memory_budget(X, memory_budget)
            optimizer_names.append(optimizer_class_name)
        except LowMemoryError:
            continue

    if not optimizer_names:
        raise RuntimeError("No available algorithm with given memory.")

    if logger is None:
        logger = get_default_logger()

    logger.info("Trying the following algorithms: %s", optimizer_names)

    optional_db_path = Path(f".autopilot-{uuid1()}.db")
    storage_: RDBStorage
    if storage is None:
        storage_ = RDBStorage(
            url=f"sqlite:///{optional_db_path.name}",
        )
    else:
        storage_ = storage

    if study_name is None:
        study_name_ = f"autopilot-{uuid1()}"
    else:
        study_name_ = study_name
    start = time.time()
    study = optuna.create_study(
        storage=storage_, study_name=study_name_, load_if_exists=True
    )
    study_id = storage_.get_study_id_from_name(study_name_)

    for _ in range(n_trials):

        task_start = time.time()
        elapsed_at_start = task_start - start

        timeout_for_this_process: Optional[int] = None
        if timeout_overall is None:
            timeout_for_this_process = timeout_singlestep
        else:
            remaining_time = int(timeout_overall - elapsed_at_start)
            if remaining_time <= 0:
                break

            if timeout_singlestep is not None:
                timeout_for_this_process = min(remaining_time, timeout_singlestep)
        task = task_resource_provider(
            X,
            evaluator,
            optimizer_names,
            suggest_overwrites,
            storage_.url,
            study_name_,
            RNS.randint(0, np.iinfo(np.int32).max, dtype=np.int32),
            logger,
        )

        task.start()
        trial_number = task.receive_trial_number()
        task.join(timeout=timeout_for_this_process)

        if task.exit_code is None:
            task.terminate()
            try:
                logger.info(f"Trial {trial_number} timeout.")
                storage_.read_trials_from_remote_storage(study_id)
                trial_id = storage_.get_trial_id_from_study_id_trial_number(
                    study_id, trial_number
                )
                trial_this = storage_.get_trial(trial_id)
                intermediate_values = sorted(
                    list(trial_this.intermediate_values.items()),
                    key=_sort_intermediate,
                )

                if intermediate_values:
                    # Though terminated, it resulted in some values.
                    # Regard it as a COMPLETE trial.
                    storage_.set_trial_values(
                        trial_id,
                        [intermediate_values[0][1]],
                    )
                    storage_.set_trial_user_attr(
                        trial_id, "max_epoch", intermediate_values[0][0] + 1
                    )
                else:
                    # Penalize such a time-consuming trial
                    storage_.set_trial_values(trial_id, [0.0])
                storage_.set_trial_state(trial_id, TrialState.COMPLETE)
            except RuntimeError:  # pragma: no cover
                pass  # pragma: no cover

        if callback is not None:
            callback(trial_number, study_to_dataframe(study))

        now = time.time()
        elapsed = now - start
        if timeout_overall is not None:
            if elapsed > timeout_overall:
                break
    best_params_with_prefix = dict(
        **study.best_trial.params,
        **{
            key: val
            for key, val in study.best_trial.user_attrs.items()
            if is_valid_param_name(key)
        },
    )
    best_params = {
        re.sub(r"^([^\.]*\.)", "", key): value
        for key, value in best_params_with_prefix.items()
    }
    optimizer_name: str = best_params.pop("optimizer_name")
    result_df = study_to_dataframe(study)

    if storage is None:
        optional_db_path.unlink()
    recommender_class = get_optimizer_class(optimizer_name).recommender_class

    return (recommender_class, best_params, result_df)