def _get_contour_info( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> _ContourInfo: _check_plot_args(study, target, target_name) trials = _filter_nonfinite(study.get_trials( deepcopy=False, states=(TrialState.COMPLETE, )), target=target) all_params = {p_name for t in trials for p_name in t.params.keys()} if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") sorted_params = [] elif params is None: sorted_params = sorted(all_params) else: if len(params) <= 1: _logger.warning("The length of params must be greater than 1.") for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) sorted_params = sorted(set(params)) sub_plot_infos: List[List[_SubContourInfo]] if len(sorted_params) == 2: x_param = sorted_params[0] y_param = sorted_params[1] sub_plot_info = _get_contour_subplot_info(trials, x_param, y_param, target) sub_plot_infos = [[sub_plot_info]] else: sub_plot_infos = [] for i, y_param in enumerate(sorted_params): sub_plot_infos.append([]) for x_param in sorted_params: sub_plot_info = _get_contour_subplot_info( trials, x_param, y_param, target) sub_plot_infos[i].append(sub_plot_info) reverse_scale = _is_reverse_scale(study, target) return _ContourInfo( sorted_params=sorted_params, sub_plot_infos=sub_plot_infos, reverse_scale=reverse_scale, target_name=target_name, )
def test_generate_contour_plot_for_few_observations() -> None: study = prepare_study_with_trials(less_than_two=True) trials = study.trials reverse_scale = _is_reverse_scale(study, target=None) # `x_axis` has one observation. params = ["param_a", "param_b"] contour, scatter = _generate_contour_subplot(trials, params[0], params[1], reverse_scale) assert contour.x is None and contour.y is None and scatter.x is None and scatter.y is None # `y_axis` has one observation. params = ["param_b", "param_a"] contour, scatter = _generate_contour_subplot(trials, params[0], params[1], reverse_scale) assert contour.x is None and contour.y is None and scatter.x is None and scatter.y is None
def _get_contour_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "go.Figure": layout = go.Layout(title="Contour Plot") trials = _filter_nonfinite(study.get_trials( deepcopy=False, states=(TrialState.COMPLETE, )), target=target) if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return go.Figure(data=[], layout=layout) all_params = {p_name for t in trials for p_name in t.params.keys()} if params is None: sorted_params = sorted(all_params) elif len(params) <= 1: _logger.warning("The length of params must be greater than 1.") return go.Figure(data=[], layout=layout) else: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) sorted_params = sorted(set(params)) padding_ratio = 0.05 param_values_range = {} for p_name in sorted_params: values = _get_param_values(trials, p_name) min_value = min(values) max_value = max(values) if _is_log_scale(trials, p_name): padding = (math.log10(max_value) - math.log10(min_value)) * padding_ratio min_value = math.pow(10, math.log10(min_value) - padding) max_value = math.pow(10, math.log10(max_value) + padding) elif _is_numerical(trials, p_name): padding = (max_value - min_value) * padding_ratio min_value = min_value - padding max_value = max_value + padding else: # Plotly>=4.12.0 draws contours using the indices of categorical variables instead of # raw values and the range should be updated based on the cardinality of categorical # variables. See https://github.com/optuna/optuna/issues/1967. if version.parse(plotly.__version__) >= version.parse("4.12.0"): span = len(set(values)) - 1 padding = span * padding_ratio min_value = -padding max_value = span + padding param_values_range[p_name] = (min_value, max_value) reverse_scale = _is_reverse_scale(study, target) if len(sorted_params) == 2: x_param = sorted_params[0] y_param = sorted_params[1] sub_plots = _generate_contour_subplot(trials, x_param, y_param, reverse_scale, param_values_range, target, target_name) figure = go.Figure(data=sub_plots, layout=layout) figure.update_xaxes(title_text=x_param, range=param_values_range[x_param]) figure.update_yaxes(title_text=y_param, range=param_values_range[y_param]) if not _is_numerical(trials, x_param): figure.update_xaxes(type="category") if not _is_numerical(trials, y_param): figure.update_yaxes(type="category") if _is_log_scale(trials, x_param): log_range = [math.log10(p) for p in param_values_range[x_param]] figure.update_xaxes(range=log_range, type="log") if _is_log_scale(trials, y_param): log_range = [math.log10(p) for p in param_values_range[y_param]] figure.update_yaxes(range=log_range, type="log") else: figure = make_subplots(rows=len(sorted_params), cols=len(sorted_params), shared_xaxes=True, shared_yaxes=True) figure.update_layout(layout) showscale = True # showscale option only needs to be specified once for x_i, x_param in enumerate(sorted_params): for y_i, y_param in enumerate(sorted_params): if x_param == y_param: figure.add_trace(go.Scatter(), row=y_i + 1, col=x_i + 1) else: sub_plots = _generate_contour_subplot( trials, x_param, y_param, reverse_scale, param_values_range, target, target_name, ) contour = sub_plots[0] scatter = sub_plots[1] contour.update( showscale=showscale) # showscale's default is True if showscale: showscale = False figure.add_trace(contour, row=y_i + 1, col=x_i + 1) figure.add_trace(scatter, row=y_i + 1, col=x_i + 1) figure.update_xaxes(range=param_values_range[x_param], row=y_i + 1, col=x_i + 1) figure.update_yaxes(range=param_values_range[y_param], row=y_i + 1, col=x_i + 1) if not _is_numerical(trials, x_param): figure.update_xaxes(type="category", row=y_i + 1, col=x_i + 1) if not _is_numerical(trials, y_param): figure.update_yaxes(type="category", row=y_i + 1, col=x_i + 1) if _is_log_scale(trials, x_param): log_range = [ math.log10(p) for p in param_values_range[x_param] ] figure.update_xaxes(range=log_range, type="log", row=y_i + 1, col=x_i + 1) if _is_log_scale(trials, y_param): log_range = [ math.log10(p) for p in param_values_range[y_param] ] figure.update_yaxes(range=log_range, type="log", row=y_i + 1, col=x_i + 1) if x_i == 0: figure.update_yaxes(title_text=y_param, row=y_i + 1, col=x_i + 1) if y_i == len(sorted_params) - 1: figure.update_xaxes(title_text=x_param, row=y_i + 1, col=x_i + 1) return figure
def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "go.Figure": layout = go.Layout(title="Parallel Coordinate Plot") reverse_scale = _is_reverse_scale(study, target) trials = _filter_nonfinite( study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)), target=target ) if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return go.Figure(data=[], layout=layout) all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError("Parameter {} does not exist in your study.".format(input_p_name)) all_params = set(params) sorted_params = sorted(all_params) if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target skipped_trial_ids = _get_skipped_trial_numbers(trials, sorted_params) objectives = tuple([target(t) for t in trials if t.number not in skipped_trial_ids]) if len(objectives) == 0: _logger.warning("Your study has only completed trials with missing parameters.") return go.Figure(data=[], layout=layout) dims: List[Dict[str, Any]] = [ { "label": target_name, "values": objectives, "range": (min(objectives), max(objectives)), } ] numeric_cat_params_indices: List[int] = [] for dim_index, p_name in enumerate(sorted_params, start=1): values = [] for t in trials: if t.number in skipped_trial_ids: continue if p_name in t.params: values.append(t.params[p_name]) if _is_log_scale(trials, p_name): values = [math.log10(v) for v in values] min_value = min(values) max_value = max(values) tickvals = list(range(math.ceil(min_value), math.ceil(max_value))) if min_value not in tickvals: tickvals = [min_value] + tickvals if max_value not in tickvals: tickvals = tickvals + [max_value] dim = { "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]), "values": tuple(values), "range": (min_value, max_value), "tickvals": tickvals, "ticktext": ["{:.3g}".format(math.pow(10, x)) for x in tickvals], } elif _is_categorical(trials, p_name): vocab: DefaultDict[str, int] = defaultdict(lambda: len(vocab)) if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys())) numeric_cat_params_indices.append(dim_index) else: values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys(), key=lambda x: vocab[x])) dim = { "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]), "values": tuple(values), "range": (min(values), max(values)), "tickvals": list(range(len(vocab))), "ticktext": ticktext, } else: dim = { "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]), "values": tuple(values), "range": (min(values), max(values)), } dims.append(dim) if numeric_cat_params_indices: # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. idx = np.lexsort([dims[index]["values"] for index in numeric_cat_params_indices][::-1]) for dim in dims: # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. dim.update({"values": tuple(np.array(dim["values"])[idx])}) traces = [ go.Parcoords( dimensions=dims, labelangle=30, labelside="bottom", line={ "color": dims[0]["values"], "colorscale": COLOR_SCALE, "colorbar": {"title": target_name}, "showscale": True, "reversescale": reverse_scale, }, ) ] figure = go.Figure(data=traces, layout=layout) return figure
def _get_parallel_coordinate_info( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> _ParallelCoordinateInfo: reverse_scale = _is_reverse_scale(study, target) trials = _filter_nonfinite(study.get_trials( deepcopy=False, states=(TrialState.COMPLETE, )), target=target) all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(all_params) if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params) objectives = tuple( [target(t) for t in trials if t.number not in skipped_trial_numbers]) # The value of (0, 0) is a dummy range. It is ignored when we plot. objective_range = (min(objectives), max(objectives)) if len(objectives) > 0 else (0, 0) dim_objective = _DimensionInfo( label=target_name, values=objectives, range=objective_range, is_log=False, is_cat=False, tickvals=[], ticktext=[], ) if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return _ParallelCoordinateInfo( dim_objective=dim_objective, dims_params=[], reverse_scale=reverse_scale, target_name=target_name, ) if len(objectives) == 0: _logger.warning( "Your study has only completed trials with missing parameters.") return _ParallelCoordinateInfo( dim_objective=dim_objective, dims_params=[], reverse_scale=reverse_scale, target_name=target_name, ) numeric_cat_params_indices: List[int] = [] dims = [] for dim_index, p_name in enumerate(sorted_params, start=1): values = [] for t in trials: if t.number in skipped_trial_numbers: continue if p_name in t.params: values.append(t.params[p_name]) if _is_log_scale(trials, p_name): values = [math.log10(v) for v in values] min_value = min(values) max_value = max(values) tickvals = list(range(math.ceil(min_value), math.ceil(max_value))) if min_value not in tickvals: tickvals = [min_value] + tickvals if max_value not in tickvals: tickvals = tickvals + [max_value] dim = _DimensionInfo( label=_truncate_label(p_name), values=tuple(values), range=(min_value, max_value), is_log=True, is_cat=False, tickvals=tickvals, ticktext=["{:.3g}".format(math.pow(10, x)) for x in tickvals], ) elif _is_categorical(trials, p_name): vocab: DefaultDict[str, int] = defaultdict(lambda: len(vocab)) if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys())) numeric_cat_params_indices.append(dim_index) else: values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys(), key=lambda x: vocab[x])) dim = _DimensionInfo( label=_truncate_label(p_name), values=tuple(values), range=(min(values), max(values)), is_log=False, is_cat=True, tickvals=list(range(len(vocab))), ticktext=ticktext, ) else: dim = _DimensionInfo( label=_truncate_label(p_name), values=tuple(values), range=(min(values), max(values)), is_log=False, is_cat=False, tickvals=[], ticktext=[], ) dims.append(dim) if numeric_cat_params_indices: dims.insert(0, dim_objective) # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. idx = np.lexsort( [dims[index].values for index in numeric_cat_params_indices][::-1]) updated_dims = [] for dim in dims: # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. updated_dims.append( _DimensionInfo( label=dim.label, values=tuple(np.array(dim.values)[idx]), range=dim.range, is_log=dim.is_log, is_cat=dim.is_cat, tickvals=dim.tickvals, ticktext=dim.ticktext, )) dim_objective = updated_dims[0] dims = updated_dims[1:] return _ParallelCoordinateInfo( dim_objective=dim_objective, dims_params=dims, reverse_scale=reverse_scale, target_name=target_name, )