Ejemplo n.º 1
0
def _get_optimization_history_plot(
    studies: List[Study],
    target: Optional[Callable[[FrozenTrial], float]],
    target_name: str,
) -> "Axes":

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.

    _, ax = plt.subplots()
    ax.set_title("Optimization History Plot")
    ax.set_xlabel("#Trials")
    ax.set_ylabel(target_name)

    if len(studies) == 0:
        _logger.warning("There are no studies.")
        return ax
    # Prepare data for plotting.
    all_trials = list(
        itertools.chain.from_iterable(
            (trial for trial in study.get_trials(deepcopy=False)
             if trial.state == TrialState.COMPLETE) for study in studies))

    if len(all_trials) == 0:
        _logger.warning("Study instance does not contain trials.")
        return ax

    ax = _get_optimization_histories(studies, target, target_name, ax)
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc="upper left")
    return ax
Ejemplo n.º 2
0
def _get_intermediate_plot(info: _IntermediatePlotInfo) -> "Axes":

    # Set up the graph style.
    plt.style.use("ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots(tight_layout=True)
    ax.set_title("Intermediate Values Plot")
    ax.set_xlabel("Step")
    ax.set_ylabel("Intermediate Value")
    cmap = plt.get_cmap("tab20")  # Use tab20 colormap for multiple line plots.

    trial_infos = info.trial_infos

    for i, tinfo in enumerate(trial_infos):
        ax.plot(
            tuple((x for x, _ in tinfo.sorted_intermediate_values)),
            tuple((y for _, y in tinfo.sorted_intermediate_values)),
            color=cmap(i),
            alpha=0.7,
            label="Trial{}".format(tinfo.trial_number),
        )

    if len(trial_infos) >= 2:
        ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)

    return ax
Ejemplo n.º 3
0
def _get_edf_plot(
    studies: List[Study],
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Empirical Distribution Function Plot")
    ax.set_xlabel(target_name)
    ax.set_ylabel("Cumulative Probability")
    ax.set_ylim(0, 1)
    cmap = plt.get_cmap("tab20")  # Use tab20 colormap for multiple line plots.

    # Prepare data for plotting.
    if len(studies) == 0:
        _logger.warning("There are no studies.")
        return ax

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target

    all_values: List[np.ndarray] = []
    for study in studies:
        trials = _filter_nonfinite(study.get_trials(
            deepcopy=False, states=(TrialState.COMPLETE, )),
                                   target=target)

        values = np.array([target(trial) for trial in trials])
        all_values.append(values)

    if all(len(values) == 0 for values in all_values):
        _logger.warning("There are no complete trials.")
        return ax

    min_x_value = np.min(np.concatenate(all_values))
    max_x_value = np.max(np.concatenate(all_values))
    x_values = np.linspace(min_x_value, max_x_value, 100)

    # Draw multiple line plots.
    for i, (values, study) in enumerate(zip(all_values, studies)):
        y_values = np.sum(values[:, np.newaxis] <= x_values,
                          axis=0) / values.size
        ax.plot(x_values,
                y_values,
                color=cmap(i),
                alpha=0.7,
                label=study.study_name)

    if len(studies) >= 2:
        ax.legend()

    return ax
Ejemplo n.º 4
0
def _get_edf_plot(
    studies: List[Study],
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Empirical Distribution Function Plot")
    ax.set_xlabel(target_name)
    ax.set_ylabel("Cumulative Probability")
    ax.set_ylim(0, 1)
    cmap = plt.get_cmap("tab20")  # Use tab20 colormap for multiple line plots.

    # Prepare data for plotting.
    if len(studies) == 0:
        _logger.warning("There are no studies.")
        return ax

    all_trials = list(
        itertools.chain.from_iterable(
            (trial for trial in study.get_trials(deepcopy=False)
             if trial.state == TrialState.COMPLETE) for study in studies))

    if len(all_trials) == 0:
        _logger.warning("There are no complete trials.")
        return ax

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target

    min_x_value = min(target(trial) for trial in all_trials)
    max_x_value = max(target(trial) for trial in all_trials)
    x_values = np.linspace(min_x_value, max_x_value, 100)

    # Draw multiple line plots.
    for i, study in enumerate(studies):
        values = np.asarray([
            target(trial) for trial in study.get_trials(deepcopy=False)
            if trial.state == TrialState.COMPLETE
        ])

        y_values = np.sum(values[:, np.newaxis] <= x_values,
                          axis=0) / values.size

        ax.plot(x_values,
                y_values,
                color=cmap(i),
                alpha=0.7,
                label=study.study_name)

    return ax
Ejemplo n.º 5
0
def _get_pareto_front_2d(info: _ParetoFrontInfo) -> "Axes":
    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Pareto-front Plot")
    cmap = plt.get_cmap(
        "tab10")  # Use tab10 colormap for similar outputs to plotly.

    ax.set_xlabel(info.target_names[info.axis_order[0]])
    ax.set_ylabel(info.target_names[info.axis_order[1]])

    trial_label: str = "Trial"
    if len(info.infeasible_trials_with_values) > 0:
        ax.scatter(
            x=[
                values[info.axis_order[0]]
                for _, values in info.infeasible_trials_with_values
            ],
            y=[
                values[info.axis_order[1]]
                for _, values in info.infeasible_trials_with_values
            ],
            color="#cccccc",
            label="Infeasible Trial",
        )
        trial_label = "Feasible Trial"
    if len(info.non_best_trials_with_values) > 0:
        ax.scatter(
            x=[
                values[info.axis_order[0]]
                for _, values in info.non_best_trials_with_values
            ],
            y=[
                values[info.axis_order[1]]
                for _, values in info.non_best_trials_with_values
            ],
            color=cmap(0),
            label=trial_label,
        )
    if len(info.best_trials_with_values) > 0:
        ax.scatter(
            x=[
                values[info.axis_order[0]]
                for _, values in info.best_trials_with_values
            ],
            y=[
                values[info.axis_order[1]]
                for _, values in info.best_trials_with_values
            ],
            color=cmap(3),
            label="Best Trial",
        )

    if info.non_best_trials_with_values is not None and ax.has_data():
        ax.legend()

    return ax
Ejemplo n.º 6
0
def _get_optimization_history_plot(
    study: Study,
    target: Optional[Callable[[FrozenTrial], float]],
    target_name: str,
) -> "Axes":

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Optimization History Plot")
    ax.set_xlabel("#Trials")
    ax.set_ylabel(target_name)
    cmap = plt.get_cmap(
        "tab10")  # Use tab10 colormap for similar outputs to plotly.

    # Prepare data for plotting.
    trials = [t for t in study.trials if t.state == TrialState.COMPLETE]

    if len(trials) == 0:
        _logger.warning("Study instance does not contain trials.")
        return ax

    # Draw a scatter plot and a line plot.
    if target is None:
        if study.direction == StudyDirection.MINIMIZE:
            best_values = np.minimum.accumulate(
                [cast(float, t.value) for t in trials])
        else:
            best_values = np.maximum.accumulate(
                [cast(float, t.value) for t in trials])
        ax.scatter(
            x=[t.number for t in trials],
            y=[t.value for t in trials],
            color=cmap(0),
            alpha=1,
            label=target_name,
        )
        ax.plot(
            [t.number for t in trials],
            best_values,
            marker="o",
            color=cmap(3),
            alpha=0.5,
            label="Best Value",
        )

        ax.legend()
    else:
        ax.scatter(
            x=[t.number for t in trials],
            y=[target(t) for t in trials],
            color=cmap(0),
            alpha=1,
            label=target_name,
        )

    return ax
Ejemplo n.º 7
0
def _get_optimization_history_plot(
    info_list: List[_OptimizationHistoryInfo],
    target_name: str,
) -> "Axes":

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Optimization History Plot")
    ax.set_xlabel("Trial")
    ax.set_ylabel(target_name)
    cmap = plt.get_cmap(
        "tab10")  # Use tab10 colormap for similar outputs to plotly.

    for i, (trial_numbers, values_info,
            best_values_info) in enumerate(info_list):
        if values_info.stds is not None:
            plt.errorbar(
                x=trial_numbers,
                y=values_info.values,
                yerr=values_info.stds,
                capsize=5,
                fmt="o",
                color="tab:blue",
            )
        ax.scatter(
            x=trial_numbers,
            y=values_info.values,
            color=cmap(0) if len(info_list) == 1 else cmap(2 * i),
            alpha=1,
            label=values_info.label_name,
        )

        if best_values_info is not None:
            ax.plot(
                trial_numbers,
                best_values_info.values,
                marker="o",
                color=cmap(3) if len(info_list) == 1 else cmap(2 * i + 1),
                alpha=0.5,
                label=best_values_info.label_name,
            )
            if best_values_info.stds is not None:
                lower = np.array(best_values_info.values) - np.array(
                    best_values_info.stds)
                upper = np.array(best_values_info.values) + np.array(
                    best_values_info.stds)
                ax.fill_between(
                    x=trial_numbers,
                    y1=lower,
                    y2=upper,
                    color="tab:red",
                    alpha=0.4,
                )
            ax.legend()
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc="upper left")
    return ax
def _get_optimization_history_plot(
    study: Study,
    target: Optional[Callable[[FrozenTrial], float]],
    target_name: str,
) -> "Axes":

    # Set up the graph style.
    plt.style.use("ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Optimization History Plot")
    ax.set_xlabel("#Trials")
    ax.set_ylabel(target_name)
    cmap = plt.get_cmap("tab10")  # Use tab10 colormap for similar outputs to plotly.

    # Prepare data for plotting.
    trials = [t for t in study.trials if t.state == TrialState.COMPLETE]

    if len(trials) == 0:
        _logger.warning("Study instance does not contain trials.")
        return ax

    best_values = [float("inf")] if study.direction == StudyDirection.MINIMIZE else [-float("inf")]
    comp = min if study.direction == StudyDirection.MINIMIZE else max
    for trial in trials:
        trial_value = trial.value
        assert trial_value is not None  # For mypy
        best_values.append(comp(best_values[-1], trial_value))
    best_values.pop(0)

    # Draw a scatter plot and a line plot.
    if target is None:
        ax.scatter(
            x=[t.number for t in trials],
            y=[t.value for t in trials],
            color=cmap(0),
            alpha=1,
            label=target_name,
        )
        ax.plot(
            [t.number for t in trials],
            best_values,
            marker="o",
            color=cmap(3),
            alpha=0.5,
            label="Best Value",
        )
    else:
        ax.scatter(
            x=[t.number for t in trials],
            y=[target(t) for t in trials],
            color=cmap(0),
            alpha=1,
            label=target_name,
        )
    ax.legend()

    return ax
Ejemplo n.º 9
0
def _get_param_importance_plot(
    study: Study,
    evaluator: Optional[BaseImportanceEvaluator] = None,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    # Set up the graph style.
    plt.style.use("ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    fig, ax = plt.subplots()
    ax.set_title("Hyperparameter Importances")
    ax.set_xlabel(f"Importance for {target_name}")
    ax.set_ylabel("Hyperparameter")

    # Prepare data for plotting.
    # Importances cannot be evaluated without completed trials.
    # Return an empty figure for consistency with other visualization functions.
    trials = [trial for trial in study.trials if trial.state == TrialState.COMPLETE]
    if len(trials) == 0:
        _logger.warning("Study instance does not contain completed trials.")
        return ax

    importances = optuna.importance.get_param_importances(
        study, evaluator=evaluator, params=params, target=target
    )

    importances = OrderedDict(reversed(list(importances.items())))
    importance_values = list(importances.values())
    param_names = list(importances.keys())
    pos = np.arange(len(param_names))

    # Draw horizontal bars.
    ax.barh(
        pos,
        importance_values,
        align="center",
        color=cm.get_cmap("tab20c")(0),
        tick_label=param_names,
    )

    renderer = fig.canvas.get_renderer()
    for idx, val in enumerate(importance_values):
        label = f" {val:.2f}" if val >= 0.01 else " <0.01"
        text = ax.text(val, idx, label, va="center")

        # Sometimes horizontal axis needs to be re-scaled
        # to avoid text going over plot area.
        bbox = text.get_window_extent(renderer)
        bbox = bbox.transformed(ax.transData.inverted())
        _, plot_xmax = ax.get_xlim()
        bbox_xmax = bbox.xmax

        if bbox_xmax > plot_xmax:
            ax.set_xlim(xmax=AXES_PADDING_RATIO * bbox_xmax)

    return ax
Ejemplo n.º 10
0
def _get_contour_plot(info: _ContourInfo) -> "Axes":

    sorted_params = info.sorted_params
    sub_plot_infos = info.sub_plot_infos
    reverse_scale = info.reverse_scale
    target_name = info.target_name

    if len(sorted_params) <= 1:
        _, ax = plt.subplots()
        return ax
    n_params = len(sorted_params)

    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    if n_params == 2:
        # Set up the graph style.
        fig, axs = plt.subplots()
        axs.set_title("Contour Plot")
        cmap = _set_cmap(reverse_scale)

        cs = _generate_contour_subplot(sub_plot_infos[0][0], axs, cmap)
        if isinstance(cs, ContourSet):
            axcb = fig.colorbar(cs)
            axcb.set_label(target_name)
    else:
        # Set up the graph style.
        fig, axs = plt.subplots(n_params, n_params)
        fig.suptitle("Contour Plot")
        cmap = _set_cmap(reverse_scale)

        # Prepare data and draw contour plots.
        cs_list = []
        for x_i in range(len(sorted_params)):
            for y_i in range(len(sorted_params)):
                ax = axs[y_i, x_i]
                cs = _generate_contour_subplot(sub_plot_infos[y_i][x_i], ax,
                                               cmap)
                if isinstance(cs, ContourSet):
                    cs_list.append(cs)
        if cs_list:
            axcb = fig.colorbar(cs_list[0], ax=axs)
            axcb.set_label(target_name)

    return axs
Ejemplo n.º 11
0
def _get_slice_plot(info: _SlicePlotInfo) -> "Axes":

    if len(info.subplots) == 0:
        _, ax = plt.subplots()
        return ax

    # Set up the graph style.
    cmap = plt.get_cmap("Blues")
    padding_ratio = 0.05
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.

    if len(info.subplots) == 1:
        # Set up the graph style.
        fig, axs = plt.subplots()
        axs.set_title("Slice Plot")

        # Draw a scatter plot.
        sc = _generate_slice_subplot(info.subplots[0], axs, cmap,
                                     padding_ratio, info.target_name)
    else:
        # Set up the graph style.
        min_figwidth = matplotlib.rcParams["figure.figsize"][0] / 2
        fighight = matplotlib.rcParams["figure.figsize"][1]
        # Ensure that each subplot has a minimum width without relying on auto-sizing.
        fig, axs = plt.subplots(
            1,
            len(info.subplots),
            sharey=True,
            figsize=(min_figwidth * len(info.subplots), fighight),
        )
        fig.suptitle("Slice Plot")

        # Draw scatter plots.
        for i, subplot in enumerate(info.subplots):
            ax = axs[i]
            sc = _generate_slice_subplot(subplot, ax, cmap, padding_ratio,
                                         info.target_name)

    axcb = fig.colorbar(sc, ax=axs)
    axcb.set_label("Trial")

    return axs
Ejemplo n.º 12
0
def _get_param_importance_plot(
    study: Study,
    evaluator: Optional[BaseImportanceEvaluator] = None,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    # Set up the graph style.
    _, ax = plt.subplots()
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    ax.set_title("Hyperparameter Importances")
    ax.set_xlabel(f"Importance for {target_name}")
    ax.set_ylabel("Hyperparameter")

    # Prepare data for plotting.
    # Importances cannot be evaluated without completed trials.
    # Return an empty figure for consistency with other visualization functions.
    trials = [
        trial for trial in study.trials if trial.state == TrialState.COMPLETE
    ]
    if len(trials) == 0:
        _logger.warning("Study instance does not contain completed trials.")
        return ax

    importances = optuna.importance.get_param_importances(study,
                                                          evaluator=evaluator,
                                                          params=params,
                                                          target=target)

    importances = OrderedDict(reversed(list(importances.items())))
    importance_values = list(importances.values())
    param_names = list(importances.keys())
    pos = np.arange(len(param_names))

    # Draw horizontal bars.
    ax.barh(
        pos,
        importance_values,
        align="center",
        color=[_get_color(param_name, study) for param_name in param_names],
        tick_label=param_names,
    )
    ax.legend(handles=_legend_elements,
              title="Distributions",
              loc="lower right")
    return ax
Ejemplo n.º 13
0
def _get_intermediate_plot(study: Study) -> "Axes":

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots(tight_layout=True)
    ax.set_title("Intermediate Values Plot")
    ax.set_xlabel("Step")
    ax.set_ylabel("Intermediate Value")
    cmap = plt.get_cmap("tab20")  # Use tab20 colormap for multiple line plots.

    # Prepare data for plotting.
    target_state = [TrialState.PRUNED, TrialState.COMPLETE, TrialState.RUNNING]
    trials = [trial for trial in study.trials if trial.state in target_state]

    if len(trials) == 0:
        _logger.warning("Study instance does not contain trials.")
        return ax

    # Draw multiple line plots.
    traces = []
    for i, trial in enumerate(trials):
        if trial.intermediate_values:
            sorted_intermediate_values = sorted(
                trial.intermediate_values.items())
            trace = ax.plot(
                tuple((x for x, _ in sorted_intermediate_values)),
                tuple((y for _, y in sorted_intermediate_values)),
                color=cmap(i),
                alpha=0.7,
                label="Trial{}".format(trial.number),
            )
            traces.append(trace)

    if not traces:
        _logger.warning(
            "You need to set up the pruning feature to utilize `plot_intermediate_values()`"
        )
        return ax

    if len(trials) >= 2:
        ax.legend(bbox_to_anchor=(1.05, 1),
                  loc="upper left",
                  borderaxespad=0.0)

    return ax
Ejemplo n.º 14
0
def _get_importances_plot(info: _ImportancesInfo) -> "Axes":
    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    fig, ax = plt.subplots()
    ax.set_title("Hyperparameter Importances")
    ax.set_xlabel(f"Importance for {info.target_name}")
    ax.set_ylabel("Hyperparameter")

    param_names = info.param_names
    pos = np.arange(len(param_names))
    importance_values = info.importance_values

    if len(importance_values) == 0:
        return ax

    # Draw horizontal bars.
    ax.barh(
        pos,
        importance_values,
        align="center",
        color=cm.get_cmap("tab20c")(0),
        tick_label=param_names,
    )

    renderer = fig.canvas.get_renderer()
    for idx, (val, label) in enumerate(
            zip(importance_values, info.importance_labels)):
        text = ax.text(val, idx, label, va="center")

        # Sometimes horizontal axis needs to be re-scaled
        # to avoid text going over plot area.
        bbox = text.get_window_extent(renderer)
        bbox = bbox.transformed(ax.transData.inverted())
        _, plot_xmax = ax.get_xlim()
        bbox_xmax = bbox.xmax

        if bbox_xmax > plot_xmax:
            ax.set_xlim(xmax=AXES_PADDING_RATIO * bbox_xmax)

    return ax
Ejemplo n.º 15
0
def _get_pareto_front_2d(
    study: Study,
    target_names: Optional[List[str]],
    include_dominated_trials: bool = False,
    axis_order: Optional[List[int]] = None,
) -> "Axes":

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Pareto-front Plot")
    cmap = plt.get_cmap(
        "tab10")  # Use tab10 colormap for similar outputs to plotly.

    if target_names is None:
        target_names = ["Objective 0", "Objective 1"]
    elif len(target_names) != 2:
        raise ValueError("The length of `target_names` is supposed to be 2.")

    # Prepare data for plotting.
    trials = study.best_trials
    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")

    if include_dominated_trials:
        non_pareto_trials = _get_non_pareto_front_trials(study, trials)
        trials += non_pareto_trials

    if axis_order is None:
        axis_order = list(range(2))
    else:
        if len(axis_order) != 2:
            raise ValueError(
                f"Size of `axis_order` {axis_order}. Expect: 2, Actual: {len(axis_order)}."
            )
        if len(set(axis_order)) != 2:
            raise ValueError(
                f"Elements of given `axis_order` {axis_order} are not unique!")
        if max(axis_order) > 1:
            raise ValueError(
                f"Given `axis_order` {axis_order} contains invalid index {max(axis_order)} "
                "higher than 1.")
        if min(axis_order) < 0:
            raise ValueError(
                f"Given `axis_order` {axis_order} contains invalid index {min(axis_order)} "
                "lower than 0.")

    ax.set_xlabel(target_names[axis_order[0]])
    ax.set_ylabel(target_names[axis_order[1]])

    if len(trials) - len(study.best_trials) != 0:
        ax.scatter(
            x=[
                t.values[axis_order[0]]
                for t in trials[len(study.best_trials):]
            ],
            y=[
                t.values[axis_order[1]]
                for t in trials[len(study.best_trials):]
            ],
            color=cmap(0),
            label="Trial",
        )
    if len(study.best_trials):
        ax.scatter(
            x=[
                t.values[axis_order[0]]
                for t in trials[:len(study.best_trials)]
            ],
            y=[
                t.values[axis_order[1]]
                for t in trials[:len(study.best_trials)]
            ],
            color=cmap(3),
            label="Best Trial",
        )

    if include_dominated_trials and ax.has_data():
        ax.legend()

    return ax
Ejemplo n.º 16
0
def _get_contour_plot(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    # Calculate basic numbers for plotting.
    trials = _filter_nonfinite(
        study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)), target=target
    )

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        _, ax = plt.subplots()
        return ax

    all_params = {p_name for t in trials for p_name in t.params.keys()}

    if params is None:
        sorted_params = sorted(all_params)
    elif len(params) <= 1:
        _logger.warning("The length of params must be greater than 1.")
        _, ax = plt.subplots()
        return ax
    else:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError("Parameter {} does not exist in your study.".format(input_p_name))
        sorted_params = sorted(set(params))
    n_params = len(sorted_params)

    plt.style.use("ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    if n_params == 2:
        # Set up the graph style.
        fig, axs = plt.subplots()
        axs.set_title("Contour Plot")
        cmap = _set_cmap(study, target)
        contour_point_num = 100

        # Prepare data and draw contour plots.
        if params:
            x_param = params[0]
            y_param = params[1]
        else:
            x_param = sorted_params[0]
            y_param = sorted_params[1]
        cs = _generate_contour_subplot(
            trials, x_param, y_param, axs, cmap, contour_point_num, target
        )
        if isinstance(cs, ContourSet):
            axcb = fig.colorbar(cs)
            axcb.set_label(target_name)
    else:
        # Set up the graph style.
        fig, axs = plt.subplots(n_params, n_params)
        fig.suptitle("Contour Plot")
        cmap = _set_cmap(study, target)
        contour_point_num = 100

        # Prepare data and draw contour plots.
        cs_list = []
        for x_i, x_param in enumerate(sorted_params):
            for y_i, y_param in enumerate(sorted_params):
                ax = axs[y_i, x_i]
                cs = _generate_contour_subplot(
                    trials, x_param, y_param, ax, cmap, contour_point_num, target
                )
                if isinstance(cs, ContourSet):
                    cs_list.append(cs)
        if cs_list:
            axcb = fig.colorbar(cs_list[0], ax=axs)
            axcb.set_label(target_name)

    return axs
Ejemplo n.º 17
0
def _get_parallel_coordinate_plot(info: _ParallelCoordinateInfo) -> "Axes":

    reversescale = info.reverse_scale
    target_name = info.target_name

    # Set up the graph style.
    fig, ax = plt.subplots()
    cmap = plt.get_cmap("Blues_r" if reversescale else "Blues")
    ax.set_title("Parallel Coordinate Plot")
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)

    # Prepare data for plotting.
    if len(info.dims_params) == 0 or len(info.dim_objective.values) == 0:
        return ax

    obj_min = info.dim_objective.range[0]
    obj_max = info.dim_objective.range[1]
    obj_w = obj_max - obj_min
    dims_obj_base = [[o] for o in info.dim_objective.values]
    for dim in info.dims_params:
        p_min = dim.range[0]
        p_max = dim.range[1]
        p_w = p_max - p_min

        if p_w == 0.0:
            center = obj_w / 2 + obj_min
            for i in range(len(dim.values)):
                dims_obj_base[i].append(center)
        else:
            for i, v in enumerate(dim.values):
                dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min)

    # Draw multiple line plots and axes.
    # Ref: https://stackoverflow.com/a/50029441
    n_params = len(info.dims_params)
    ax.set_xlim(0, n_params)
    ax.set_ylim(info.dim_objective.range[0], info.dim_objective.range[1])
    xs = [range(n_params + 1) for _ in range(len(dims_obj_base))]
    segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)]
    lc = LineCollection(segments, cmap=cmap)
    lc.set_array(np.asarray(info.dim_objective.values))
    axcb = fig.colorbar(lc, pad=0.1)
    axcb.set_label(target_name)
    var_names = [info.dim_objective.label
                 ] + [dim.label for dim in info.dims_params]
    plt.xticks(range(n_params + 1), var_names, rotation=330)

    for i, dim in enumerate(info.dims_params):
        ax2 = ax.twinx()
        if dim.is_log:
            ax2.set_ylim(np.power(10, dim.range[0]),
                         np.power(10, dim.range[1]))
            ax2.set_yscale("log")
        else:
            ax2.set_ylim(dim.range[0], dim.range[1])
        ax2.spines["top"].set_visible(False)
        ax2.spines["bottom"].set_visible(False)
        ax2.xaxis.set_visible(False)
        ax2.spines["right"].set_position(("axes", (i + 1) / n_params))
        if dim.is_cat:
            ax2.set_yticks(dim.tickvals)
            ax2.set_yticklabels(dim.ticktext)

    ax.add_collection(lc)

    return ax
Ejemplo n.º 18
0
def plot_edf(
    study: Union[Study, Sequence[Study]],
    *,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":
    """Plot the objective value EDF (empirical distribution function) of a study with Matplotlib.

    Note that only the complete trials are considered when plotting the EDF.

    .. seealso::
        Please refer to :func:`optuna.visualization.plot_edf` for an example,
        where this function can be replaced with it.

    .. note::

        Please refer to `matplotlib.pyplot.legend
        <https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.legend.html>`_
        to adjust the style of the generated legend.

    Example:

        The following code snippet shows how to plot EDF.

        .. plot::

            import math

            import optuna


            def ackley(x, y):
                a = 20 * math.exp(-0.2 * math.sqrt(0.5 * (x ** 2 + y ** 2)))
                b = math.exp(0.5 * (math.cos(2 * math.pi * x) + math.cos(2 * math.pi * y)))
                return -a - b + math.e + 20


            def objective(trial, low, high):
                x = trial.suggest_float("x", low, high)
                y = trial.suggest_float("y", low, high)
                return ackley(x, y)


            sampler = optuna.samplers.RandomSampler(seed=10)

            # Widest search space.
            study0 = optuna.create_study(study_name="x=[0,5), y=[0,5)", sampler=sampler)
            study0.optimize(lambda t: objective(t, 0, 5), n_trials=500)

            # Narrower search space.
            study1 = optuna.create_study(study_name="x=[0,4), y=[0,4)", sampler=sampler)
            study1.optimize(lambda t: objective(t, 0, 4), n_trials=500)

            # Narrowest search space but it doesn't include the global optimum point.
            study2 = optuna.create_study(study_name="x=[1,3), y=[1,3)", sampler=sampler)
            study2.optimize(lambda t: objective(t, 1, 3), n_trials=500)

            optuna.visualization.matplotlib.plot_edf([study0, study1, study2])

    Args:
        study:
            A target :class:`~optuna.study.Study` object.
            You can pass multiple studies if you want to compare those EDFs.
        target:
            A function to specify the value to display. If it is :obj:`None` and ``study`` is being
            used for single-objective optimization, the objective values are plotted.

            .. note::
                Specify this argument if ``study`` is being used for multi-objective optimization.
        target_name:
            Target's name to display on the axis label.

    Returns:
        A :class:`matplotlib.axes.Axes` object.
    """

    _imports.check()

    # Set up the graph style.
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.
    _, ax = plt.subplots()
    ax.set_title("Empirical Distribution Function Plot")
    ax.set_xlabel(target_name)
    ax.set_ylabel("Cumulative Probability")
    ax.set_ylim(0, 1)
    cmap = plt.get_cmap("tab20")  # Use tab20 colormap for multiple line plots.

    info = _get_edf_info(study, target, target_name)
    edf_lines = info.lines

    if len(edf_lines) == 0:
        return ax

    for i, (study_name, y_values) in enumerate(edf_lines):
        ax.plot(info.x_values,
                y_values,
                color=cmap(i),
                alpha=0.7,
                label=study_name)

    if len(edf_lines) >= 2:
        ax.legend()

    return ax
Ejemplo n.º 19
0
def _get_parallel_coordinate_plot(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target
        reversescale = study.direction == StudyDirection.MINIMIZE
    else:
        reversescale = True

    # Set up the graph style.
    fig, ax = plt.subplots()
    cmap = plt.get_cmap("Blues_r" if reversescale else "Blues")
    ax.set_title("Parallel Coordinate Plot")
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)

    # Prepare data for plotting.
    trials = [
        trial for trial in study.trials if trial.state == TrialState.COMPLETE
    ]

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        return ax

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is not None:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        all_params = set(params)
    sorted_params = sorted(list(all_params))

    obj_org = [target(t) for t in trials]
    obj_min = min(obj_org)
    obj_max = max(obj_org)
    obj_w = obj_max - obj_min
    dims_obj_base = [[o] for o in obj_org]

    cat_param_names = []
    cat_param_values = []
    cat_param_ticks = []
    log_param_names = []
    param_values = []
    var_names = [target_name]
    for p_name in sorted_params:
        values = [
            t.params[p_name] if p_name in t.params else np.nan for t in trials
        ]

        if _is_log_scale(trials, p_name):
            p_min = math.log10(min(values))
            p_max = math.log10(max(values))
            p_w = p_max - p_min
            log_param_names.append(p_name)
            for i, v in enumerate(values):
                dims_obj_base[i].append((math.log10(v) - p_min) / p_w * obj_w +
                                        obj_min)
        elif _is_categorical(trials, p_name):
            vocab = defaultdict(
                lambda: len(vocab))  # type: DefaultDict[str, int]
            values = [vocab[v] for v in values]
            cat_param_names.append(p_name)
            vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1])
            cat_param_values.append([v[0] for v in vocab_item_sorted])
            cat_param_ticks.append([v[1] for v in vocab_item_sorted])
            p_min = min(values)
            p_max = max(values)
            p_w = p_max - p_min
            for i, v in enumerate(values):
                dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min)
        else:
            p_min = min(values)
            p_max = max(values)
            p_w = p_max - p_min

            for i, v in enumerate(values):
                dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min)

        var_names.append(
            p_name if len(p_name) < 20 else "{}...".format(p_name[:17]))
        param_values.append(values)

    # Draw multiple line plots and axes.
    # Ref: https://stackoverflow.com/a/50029441
    ax.set_xlim(0, len(sorted_params))
    ax.set_ylim(obj_min, obj_max)
    xs = [range(0, len(sorted_params) + 1) for i in range(len(dims_obj_base))]
    segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)]
    lc = LineCollection(segments, cmap=cmap)
    lc.set_array(np.asarray([target(t) for t in trials] + [0]))
    axcb = fig.colorbar(lc, pad=0.1)
    axcb.set_label(target_name)
    plt.xticks(range(0, len(sorted_params) + 1), var_names, rotation=330)

    for i, p_name in enumerate(sorted_params):
        ax2 = ax.twinx()
        ax2.set_ylim(min(param_values[i]), max(param_values[i]))
        if _is_log_scale(trials, p_name):
            ax2.set_yscale("log")
        ax2.spines["top"].set_visible(False)
        ax2.spines["bottom"].set_visible(False)
        ax2.get_xaxis().set_visible(False)
        ax2.plot([1] * len(param_values[i]), param_values[i], visible=False)
        ax2.spines["right"].set_position(
            ("axes", (i + 1) / len(sorted_params)))
        if p_name in cat_param_names:
            idx = cat_param_names.index(p_name)
            tick_pos = cat_param_ticks[idx]
            tick_labels = cat_param_values[idx]
            ax2.set_yticks(tick_pos)
            ax2.set_yticklabels(tick_labels)

    ax.add_collection(lc)

    return ax
Ejemplo n.º 20
0
def _get_slice_plot(study: Study,
                    params: Optional[List[str]] = None) -> "Axes":

    # Calculate basic numbers for plotting.
    trials = [
        trial for trial in study.trials if trial.state == TrialState.COMPLETE
    ]

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        _, ax = plt.subplots()
        return ax

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is None:
        sorted_params = sorted(list(all_params))
    else:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        sorted_params = sorted(list(set(params)))

    n_params = len(sorted_params)

    # Set up the graph style.
    cmap = plt.get_cmap("Blues")
    padding_ratio = 0.05
    plt.style.use(
        "ggplot")  # Use ggplot style sheet for similar outputs to plotly.

    # Prepare data.
    obj_values = [t.value for t in trials]

    if n_params == 1:
        # Set up the graph style.
        fig, axs = plt.subplots()
        axs.set_title("Slice Plot")

        # Draw a scatter plot.
        sc = _generate_slice_subplot(
            trials,
            sorted_params[0],
            axs,
            cmap,
            padding_ratio,
            obj_values  # type: ignore
        )
    else:
        # Set up the graph style.
        min_figwidth = matplotlib.rcParams["figure.figsize"][0] / 2
        fighight = matplotlib.rcParams["figure.figsize"][1]
        # Ensure that each subplot has a minimum width without relying on auto-sizing.
        fig, axs = plt.subplots(1,
                                n_params,
                                sharey=True,
                                figsize=(min_figwidth * n_params, fighight))
        fig.suptitle("Slice Plot")

        # Draw scatter plots.
        for i, param in enumerate(sorted_params):
            ax = axs[i]
            sc = _generate_slice_subplot(
                trials,
                param,
                ax,
                cmap,
                padding_ratio,
                obj_values  # type: ignore
            )

    axcb = fig.colorbar(sc, ax=axs)
    axcb.set_label("#Trials")

    return axs
Ejemplo n.º 21
0
def _get_parallel_coordinate_plot(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target
        reversescale = study.direction == StudyDirection.MINIMIZE
    else:
        reversescale = True

    # Set up the graph style.
    fig, ax = plt.subplots()
    cmap = plt.get_cmap("Blues_r" if reversescale else "Blues")
    ax.set_title("Parallel Coordinate Plot")
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)

    # Prepare data for plotting.
    trials = [
        trial for trial in study.trials if trial.state == TrialState.COMPLETE
    ]

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        return ax

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is not None:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        all_params = set(params)
    sorted_params = sorted(all_params)

    skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params)

    obj_org = [
        target(t) for t in trials if t.number not in skipped_trial_numbers
    ]

    if len(obj_org) == 0:
        _logger.warning(
            "Your study has only completed trials with missing parameters.")
        return ax

    obj_min = min(obj_org)
    obj_max = max(obj_org)
    obj_w = obj_max - obj_min
    dims_obj_base = [[o] for o in obj_org]

    cat_param_names = []
    cat_param_values = []
    cat_param_ticks = []
    param_values = []
    var_names = [target_name]
    numeric_cat_params_indices: List[int] = []

    for param_index, p_name in enumerate(sorted_params):
        values = [
            t.params[p_name] for t in trials
            if t.number not in skipped_trial_numbers
        ]

        if _is_categorical(trials, p_name):
            vocab = defaultdict(
                lambda: len(vocab))  # type: DefaultDict[str, int]

            if _is_numerical(trials, p_name):
                _ = [vocab[v] for v in sorted(values)]
                numeric_cat_params_indices.append(param_index)

            values = [vocab[v] for v in values]

            cat_param_names.append(p_name)
            vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1])
            cat_param_values.append([v[0] for v in vocab_item_sorted])
            cat_param_ticks.append([v[1] for v in vocab_item_sorted])

        if _is_log_scale(trials, p_name):
            values_for_lc = [np.log10(v) for v in values]
        else:
            values_for_lc = values

        p_min = min(values_for_lc)
        p_max = max(values_for_lc)
        p_w = p_max - p_min

        if p_w == 0.0:
            center = obj_w / 2 + obj_min
            for i in range(len(values)):
                dims_obj_base[i].append(center)
        else:
            for i, v in enumerate(values_for_lc):
                dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min)

        var_names.append(
            p_name if len(p_name) < 20 else "{}...".format(p_name[:17]))
        param_values.append(values)

    if numeric_cat_params_indices:
        # np.lexsort consumes the sort keys the order from back to front.
        # So the values of parameters have to be reversed the order.
        sorted_idx = np.lexsort([
            param_values[index] for index in numeric_cat_params_indices
        ][::-1])
        # Since the values are mapped to other categories by the index,
        # the index will be swapped according to the sorted index of numeric params.
        param_values = [list(np.array(v)[sorted_idx]) for v in param_values]

    # Draw multiple line plots and axes.
    # Ref: https://stackoverflow.com/a/50029441
    ax.set_xlim(0, len(sorted_params))
    ax.set_ylim(obj_min, obj_max)
    xs = [range(len(sorted_params) + 1) for _ in range(len(dims_obj_base))]
    segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)]
    lc = LineCollection(segments, cmap=cmap)
    lc.set_array(np.asarray(obj_org))
    axcb = fig.colorbar(lc, pad=0.1)
    axcb.set_label(target_name)
    plt.xticks(range(len(sorted_params) + 1), var_names, rotation=330)

    for i, p_name in enumerate(sorted_params):
        ax2 = ax.twinx()
        ax2.set_ylim(min(param_values[i]), max(param_values[i]))
        if _is_log_scale(trials, p_name):
            ax2.set_yscale("log")
        ax2.spines["top"].set_visible(False)
        ax2.spines["bottom"].set_visible(False)
        ax2.xaxis.set_visible(False)
        ax2.plot([1] * len(param_values[i]), param_values[i], visible=False)
        ax2.spines["right"].set_position(
            ("axes", (i + 1) / len(sorted_params)))
        if p_name in cat_param_names:
            idx = cat_param_names.index(p_name)
            tick_pos = cat_param_ticks[idx]
            tick_labels = cat_param_values[idx]
            ax2.set_yticks(tick_pos)
            ax2.set_yticklabels(tick_labels)

    ax.add_collection(lc)

    return ax