Beispiel #1
0
def _get_contour_info(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> _ContourInfo:

    _check_plot_args(study, target, target_name)

    trials = _filter_nonfinite(study.get_trials(
        deepcopy=False, states=(TrialState.COMPLETE, )),
                               target=target)

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        sorted_params = []
    elif params is None:
        sorted_params = sorted(all_params)
    else:
        if len(params) <= 1:
            _logger.warning("The length of params must be greater than 1.")

        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        sorted_params = sorted(set(params))

    sub_plot_infos: List[List[_SubContourInfo]]
    if len(sorted_params) == 2:
        x_param = sorted_params[0]
        y_param = sorted_params[1]
        sub_plot_info = _get_contour_subplot_info(trials, x_param, y_param,
                                                  target)
        sub_plot_infos = [[sub_plot_info]]
    else:
        sub_plot_infos = []
        for i, y_param in enumerate(sorted_params):
            sub_plot_infos.append([])
            for x_param in sorted_params:
                sub_plot_info = _get_contour_subplot_info(
                    trials, x_param, y_param, target)
                sub_plot_infos[i].append(sub_plot_info)

    reverse_scale = _is_reverse_scale(study, target)

    return _ContourInfo(
        sorted_params=sorted_params,
        sub_plot_infos=sub_plot_infos,
        reverse_scale=reverse_scale,
        target_name=target_name,
    )
Beispiel #2
0
def test_generate_contour_plot_for_few_observations() -> None:

    study = prepare_study_with_trials(less_than_two=True)
    trials = study.trials
    reverse_scale = _is_reverse_scale(study, target=None)

    # `x_axis` has one observation.
    params = ["param_a", "param_b"]
    contour, scatter = _generate_contour_subplot(trials, params[0], params[1],
                                                 reverse_scale)
    assert contour.x is None and contour.y is None and scatter.x is None and scatter.y is None

    # `y_axis` has one observation.
    params = ["param_b", "param_a"]
    contour, scatter = _generate_contour_subplot(trials, params[0], params[1],
                                                 reverse_scale)
    assert contour.x is None and contour.y is None and scatter.x is None and scatter.y is None
Beispiel #3
0
def _get_contour_plot(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "go.Figure":

    layout = go.Layout(title="Contour Plot")

    trials = _filter_nonfinite(study.get_trials(
        deepcopy=False, states=(TrialState.COMPLETE, )),
                               target=target)

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        return go.Figure(data=[], layout=layout)

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is None:
        sorted_params = sorted(all_params)
    elif len(params) <= 1:
        _logger.warning("The length of params must be greater than 1.")
        return go.Figure(data=[], layout=layout)
    else:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        sorted_params = sorted(set(params))

    padding_ratio = 0.05
    param_values_range = {}
    for p_name in sorted_params:
        values = _get_param_values(trials, p_name)

        min_value = min(values)
        max_value = max(values)

        if _is_log_scale(trials, p_name):
            padding = (math.log10(max_value) -
                       math.log10(min_value)) * padding_ratio
            min_value = math.pow(10, math.log10(min_value) - padding)
            max_value = math.pow(10, math.log10(max_value) + padding)

        elif _is_numerical(trials, p_name):
            padding = (max_value - min_value) * padding_ratio
            min_value = min_value - padding
            max_value = max_value + padding

        else:
            # Plotly>=4.12.0 draws contours using the indices of categorical variables instead of
            # raw values and the range should be updated based on the cardinality of categorical
            # variables. See https://github.com/optuna/optuna/issues/1967.
            if version.parse(plotly.__version__) >= version.parse("4.12.0"):
                span = len(set(values)) - 1
                padding = span * padding_ratio
                min_value = -padding
                max_value = span + padding

        param_values_range[p_name] = (min_value, max_value)

    reverse_scale = _is_reverse_scale(study, target)

    if len(sorted_params) == 2:
        x_param = sorted_params[0]
        y_param = sorted_params[1]
        sub_plots = _generate_contour_subplot(trials, x_param, y_param,
                                              reverse_scale,
                                              param_values_range, target,
                                              target_name)
        figure = go.Figure(data=sub_plots, layout=layout)
        figure.update_xaxes(title_text=x_param,
                            range=param_values_range[x_param])
        figure.update_yaxes(title_text=y_param,
                            range=param_values_range[y_param])

        if not _is_numerical(trials, x_param):
            figure.update_xaxes(type="category")
        if not _is_numerical(trials, y_param):
            figure.update_yaxes(type="category")

        if _is_log_scale(trials, x_param):
            log_range = [math.log10(p) for p in param_values_range[x_param]]
            figure.update_xaxes(range=log_range, type="log")
        if _is_log_scale(trials, y_param):
            log_range = [math.log10(p) for p in param_values_range[y_param]]
            figure.update_yaxes(range=log_range, type="log")
    else:
        figure = make_subplots(rows=len(sorted_params),
                               cols=len(sorted_params),
                               shared_xaxes=True,
                               shared_yaxes=True)
        figure.update_layout(layout)
        showscale = True  # showscale option only needs to be specified once
        for x_i, x_param in enumerate(sorted_params):
            for y_i, y_param in enumerate(sorted_params):
                if x_param == y_param:
                    figure.add_trace(go.Scatter(), row=y_i + 1, col=x_i + 1)
                else:
                    sub_plots = _generate_contour_subplot(
                        trials,
                        x_param,
                        y_param,
                        reverse_scale,
                        param_values_range,
                        target,
                        target_name,
                    )
                    contour = sub_plots[0]
                    scatter = sub_plots[1]
                    contour.update(
                        showscale=showscale)  # showscale's default is True
                    if showscale:
                        showscale = False
                    figure.add_trace(contour, row=y_i + 1, col=x_i + 1)
                    figure.add_trace(scatter, row=y_i + 1, col=x_i + 1)

                figure.update_xaxes(range=param_values_range[x_param],
                                    row=y_i + 1,
                                    col=x_i + 1)
                figure.update_yaxes(range=param_values_range[y_param],
                                    row=y_i + 1,
                                    col=x_i + 1)

                if not _is_numerical(trials, x_param):
                    figure.update_xaxes(type="category",
                                        row=y_i + 1,
                                        col=x_i + 1)
                if not _is_numerical(trials, y_param):
                    figure.update_yaxes(type="category",
                                        row=y_i + 1,
                                        col=x_i + 1)

                if _is_log_scale(trials, x_param):
                    log_range = [
                        math.log10(p) for p in param_values_range[x_param]
                    ]
                    figure.update_xaxes(range=log_range,
                                        type="log",
                                        row=y_i + 1,
                                        col=x_i + 1)
                if _is_log_scale(trials, y_param):
                    log_range = [
                        math.log10(p) for p in param_values_range[y_param]
                    ]
                    figure.update_yaxes(range=log_range,
                                        type="log",
                                        row=y_i + 1,
                                        col=x_i + 1)

                if x_i == 0:
                    figure.update_yaxes(title_text=y_param,
                                        row=y_i + 1,
                                        col=x_i + 1)
                if y_i == len(sorted_params) - 1:
                    figure.update_xaxes(title_text=x_param,
                                        row=y_i + 1,
                                        col=x_i + 1)

    return figure
def _get_parallel_coordinate_plot(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "go.Figure":

    layout = go.Layout(title="Parallel Coordinate Plot")
    reverse_scale = _is_reverse_scale(study, target)

    trials = _filter_nonfinite(
        study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)), target=target
    )

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        return go.Figure(data=[], layout=layout)

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is not None:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError("Parameter {} does not exist in your study.".format(input_p_name))
        all_params = set(params)
    sorted_params = sorted(all_params)

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target

    skipped_trial_ids = _get_skipped_trial_numbers(trials, sorted_params)

    objectives = tuple([target(t) for t in trials if t.number not in skipped_trial_ids])

    if len(objectives) == 0:
        _logger.warning("Your study has only completed trials with missing parameters.")
        return go.Figure(data=[], layout=layout)

    dims: List[Dict[str, Any]] = [
        {
            "label": target_name,
            "values": objectives,
            "range": (min(objectives), max(objectives)),
        }
    ]

    numeric_cat_params_indices: List[int] = []
    for dim_index, p_name in enumerate(sorted_params, start=1):
        values = []
        for t in trials:
            if t.number in skipped_trial_ids:
                continue

            if p_name in t.params:
                values.append(t.params[p_name])

        if _is_log_scale(trials, p_name):
            values = [math.log10(v) for v in values]
            min_value = min(values)
            max_value = max(values)
            tickvals = list(range(math.ceil(min_value), math.ceil(max_value)))
            if min_value not in tickvals:
                tickvals = [min_value] + tickvals
            if max_value not in tickvals:
                tickvals = tickvals + [max_value]
            dim = {
                "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]),
                "values": tuple(values),
                "range": (min_value, max_value),
                "tickvals": tickvals,
                "ticktext": ["{:.3g}".format(math.pow(10, x)) for x in tickvals],
            }
        elif _is_categorical(trials, p_name):
            vocab: DefaultDict[str, int] = defaultdict(lambda: len(vocab))

            if _is_numerical(trials, p_name):
                _ = [vocab[v] for v in sorted(values)]
                values = [vocab[v] for v in values]
                ticktext = list(sorted(vocab.keys()))
                numeric_cat_params_indices.append(dim_index)
            else:
                values = [vocab[v] for v in values]
                ticktext = list(sorted(vocab.keys(), key=lambda x: vocab[x]))

            dim = {
                "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]),
                "values": tuple(values),
                "range": (min(values), max(values)),
                "tickvals": list(range(len(vocab))),
                "ticktext": ticktext,
            }
        else:
            dim = {
                "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]),
                "values": tuple(values),
                "range": (min(values), max(values)),
            }

        dims.append(dim)

    if numeric_cat_params_indices:
        # np.lexsort consumes the sort keys the order from back to front.
        # So the values of parameters have to be reversed the order.
        idx = np.lexsort([dims[index]["values"] for index in numeric_cat_params_indices][::-1])
        for dim in dims:
            # Since the values are mapped to other categories by the index,
            # the index will be swapped according to the sorted index of numeric params.
            dim.update({"values": tuple(np.array(dim["values"])[idx])})

    traces = [
        go.Parcoords(
            dimensions=dims,
            labelangle=30,
            labelside="bottom",
            line={
                "color": dims[0]["values"],
                "colorscale": COLOR_SCALE,
                "colorbar": {"title": target_name},
                "showscale": True,
                "reversescale": reverse_scale,
            },
        )
    ]

    figure = go.Figure(data=traces, layout=layout)

    return figure
Beispiel #5
0
def _get_parallel_coordinate_info(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> _ParallelCoordinateInfo:

    reverse_scale = _is_reverse_scale(study, target)

    trials = _filter_nonfinite(study.get_trials(
        deepcopy=False, states=(TrialState.COMPLETE, )),
                               target=target)

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is not None:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        all_params = set(params)
    sorted_params = sorted(all_params)

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target

    skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params)

    objectives = tuple(
        [target(t) for t in trials if t.number not in skipped_trial_numbers])
    # The value of (0, 0) is a dummy range. It is ignored when we plot.
    objective_range = (min(objectives),
                       max(objectives)) if len(objectives) > 0 else (0, 0)
    dim_objective = _DimensionInfo(
        label=target_name,
        values=objectives,
        range=objective_range,
        is_log=False,
        is_cat=False,
        tickvals=[],
        ticktext=[],
    )

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        return _ParallelCoordinateInfo(
            dim_objective=dim_objective,
            dims_params=[],
            reverse_scale=reverse_scale,
            target_name=target_name,
        )

    if len(objectives) == 0:
        _logger.warning(
            "Your study has only completed trials with missing parameters.")
        return _ParallelCoordinateInfo(
            dim_objective=dim_objective,
            dims_params=[],
            reverse_scale=reverse_scale,
            target_name=target_name,
        )

    numeric_cat_params_indices: List[int] = []
    dims = []
    for dim_index, p_name in enumerate(sorted_params, start=1):
        values = []
        for t in trials:
            if t.number in skipped_trial_numbers:
                continue

            if p_name in t.params:
                values.append(t.params[p_name])

        if _is_log_scale(trials, p_name):
            values = [math.log10(v) for v in values]
            min_value = min(values)
            max_value = max(values)
            tickvals = list(range(math.ceil(min_value), math.ceil(max_value)))
            if min_value not in tickvals:
                tickvals = [min_value] + tickvals
            if max_value not in tickvals:
                tickvals = tickvals + [max_value]
            dim = _DimensionInfo(
                label=_truncate_label(p_name),
                values=tuple(values),
                range=(min_value, max_value),
                is_log=True,
                is_cat=False,
                tickvals=tickvals,
                ticktext=["{:.3g}".format(math.pow(10, x)) for x in tickvals],
            )
        elif _is_categorical(trials, p_name):
            vocab: DefaultDict[str, int] = defaultdict(lambda: len(vocab))

            if _is_numerical(trials, p_name):
                _ = [vocab[v] for v in sorted(values)]
                values = [vocab[v] for v in values]
                ticktext = list(sorted(vocab.keys()))
                numeric_cat_params_indices.append(dim_index)
            else:
                values = [vocab[v] for v in values]
                ticktext = list(sorted(vocab.keys(), key=lambda x: vocab[x]))
            dim = _DimensionInfo(
                label=_truncate_label(p_name),
                values=tuple(values),
                range=(min(values), max(values)),
                is_log=False,
                is_cat=True,
                tickvals=list(range(len(vocab))),
                ticktext=ticktext,
            )
        else:
            dim = _DimensionInfo(
                label=_truncate_label(p_name),
                values=tuple(values),
                range=(min(values), max(values)),
                is_log=False,
                is_cat=False,
                tickvals=[],
                ticktext=[],
            )

        dims.append(dim)

    if numeric_cat_params_indices:
        dims.insert(0, dim_objective)
        # np.lexsort consumes the sort keys the order from back to front.
        # So the values of parameters have to be reversed the order.
        idx = np.lexsort(
            [dims[index].values for index in numeric_cat_params_indices][::-1])
        updated_dims = []
        for dim in dims:
            # Since the values are mapped to other categories by the index,
            # the index will be swapped according to the sorted index of numeric params.
            updated_dims.append(
                _DimensionInfo(
                    label=dim.label,
                    values=tuple(np.array(dim.values)[idx]),
                    range=dim.range,
                    is_log=dim.is_log,
                    is_cat=dim.is_cat,
                    tickvals=dim.tickvals,
                    ticktext=dim.ticktext,
                ))
        dim_objective = updated_dims[0]
        dims = updated_dims[1:]

    return _ParallelCoordinateInfo(
        dim_objective=dim_objective,
        dims_params=dims,
        reverse_scale=reverse_scale,
        target_name=target_name,
    )