def _get_intermediate_plot(info: _IntermediatePlotInfo) -> "Axes": # Set up the graph style. plt.style.use("ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots(tight_layout=True) ax.set_title("Intermediate Values Plot") ax.set_xlabel("Step") ax.set_ylabel("Intermediate Value") cmap = plt.get_cmap("tab20") # Use tab20 colormap for multiple line plots. trial_infos = info.trial_infos for i, tinfo in enumerate(trial_infos): ax.plot( tuple((x for x, _ in tinfo.sorted_intermediate_values)), tuple((y for _, y in tinfo.sorted_intermediate_values)), color=cmap(i), alpha=0.7, label="Trial{}".format(tinfo.trial_number), ) if len(trial_infos) >= 2: ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0) return ax
def _get_edf_plot( studies: List[Study], target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Empirical Distribution Function Plot") ax.set_xlabel(target_name) ax.set_ylabel("Cumulative Probability") ax.set_ylim(0, 1) cmap = plt.get_cmap("tab20") # Use tab20 colormap for multiple line plots. # Prepare data for plotting. if len(studies) == 0: _logger.warning("There are no studies.") return ax if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target all_values: List[np.ndarray] = [] for study in studies: trials = _filter_nonfinite(study.get_trials( deepcopy=False, states=(TrialState.COMPLETE, )), target=target) values = np.array([target(trial) for trial in trials]) all_values.append(values) if all(len(values) == 0 for values in all_values): _logger.warning("There are no complete trials.") return ax min_x_value = np.min(np.concatenate(all_values)) max_x_value = np.max(np.concatenate(all_values)) x_values = np.linspace(min_x_value, max_x_value, 100) # Draw multiple line plots. for i, (values, study) in enumerate(zip(all_values, studies)): y_values = np.sum(values[:, np.newaxis] <= x_values, axis=0) / values.size ax.plot(x_values, y_values, color=cmap(i), alpha=0.7, label=study.study_name) if len(studies) >= 2: ax.legend() return ax
def _get_pareto_front_2d(info: _ParetoFrontInfo) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Pareto-front Plot") cmap = plt.get_cmap( "tab10") # Use tab10 colormap for similar outputs to plotly. ax.set_xlabel(info.target_names[info.axis_order[0]]) ax.set_ylabel(info.target_names[info.axis_order[1]]) trial_label: str = "Trial" if len(info.infeasible_trials_with_values) > 0: ax.scatter( x=[ values[info.axis_order[0]] for _, values in info.infeasible_trials_with_values ], y=[ values[info.axis_order[1]] for _, values in info.infeasible_trials_with_values ], color="#cccccc", label="Infeasible Trial", ) trial_label = "Feasible Trial" if len(info.non_best_trials_with_values) > 0: ax.scatter( x=[ values[info.axis_order[0]] for _, values in info.non_best_trials_with_values ], y=[ values[info.axis_order[1]] for _, values in info.non_best_trials_with_values ], color=cmap(0), label=trial_label, ) if len(info.best_trials_with_values) > 0: ax.scatter( x=[ values[info.axis_order[0]] for _, values in info.best_trials_with_values ], y=[ values[info.axis_order[1]] for _, values in info.best_trials_with_values ], color=cmap(3), label="Best Trial", ) if info.non_best_trials_with_values is not None and ax.has_data(): ax.legend() return ax
def _get_edf_plot( studies: List[Study], target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Empirical Distribution Function Plot") ax.set_xlabel(target_name) ax.set_ylabel("Cumulative Probability") ax.set_ylim(0, 1) cmap = plt.get_cmap("tab20") # Use tab20 colormap for multiple line plots. # Prepare data for plotting. if len(studies) == 0: _logger.warning("There are no studies.") return ax all_trials = list( itertools.chain.from_iterable( (trial for trial in study.get_trials(deepcopy=False) if trial.state == TrialState.COMPLETE) for study in studies)) if len(all_trials) == 0: _logger.warning("There are no complete trials.") return ax if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target min_x_value = min(target(trial) for trial in all_trials) max_x_value = max(target(trial) for trial in all_trials) x_values = np.linspace(min_x_value, max_x_value, 100) # Draw multiple line plots. for i, study in enumerate(studies): values = np.asarray([ target(trial) for trial in study.get_trials(deepcopy=False) if trial.state == TrialState.COMPLETE ]) y_values = np.sum(values[:, np.newaxis] <= x_values, axis=0) / values.size ax.plot(x_values, y_values, color=cmap(i), alpha=0.7, label=study.study_name) return ax
def _get_optimization_history_plot( study: Study, target: Optional[Callable[[FrozenTrial], float]], target_name: str, ) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Optimization History Plot") ax.set_xlabel("#Trials") ax.set_ylabel(target_name) cmap = plt.get_cmap( "tab10") # Use tab10 colormap for similar outputs to plotly. # Prepare data for plotting. trials = [t for t in study.trials if t.state == TrialState.COMPLETE] if len(trials) == 0: _logger.warning("Study instance does not contain trials.") return ax # Draw a scatter plot and a line plot. if target is None: if study.direction == StudyDirection.MINIMIZE: best_values = np.minimum.accumulate( [cast(float, t.value) for t in trials]) else: best_values = np.maximum.accumulate( [cast(float, t.value) for t in trials]) ax.scatter( x=[t.number for t in trials], y=[t.value for t in trials], color=cmap(0), alpha=1, label=target_name, ) ax.plot( [t.number for t in trials], best_values, marker="o", color=cmap(3), alpha=0.5, label="Best Value", ) ax.legend() else: ax.scatter( x=[t.number for t in trials], y=[target(t) for t in trials], color=cmap(0), alpha=1, label=target_name, ) return ax
def _get_optimization_history_plot( info_list: List[_OptimizationHistoryInfo], target_name: str, ) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Optimization History Plot") ax.set_xlabel("Trial") ax.set_ylabel(target_name) cmap = plt.get_cmap( "tab10") # Use tab10 colormap for similar outputs to plotly. for i, (trial_numbers, values_info, best_values_info) in enumerate(info_list): if values_info.stds is not None: plt.errorbar( x=trial_numbers, y=values_info.values, yerr=values_info.stds, capsize=5, fmt="o", color="tab:blue", ) ax.scatter( x=trial_numbers, y=values_info.values, color=cmap(0) if len(info_list) == 1 else cmap(2 * i), alpha=1, label=values_info.label_name, ) if best_values_info is not None: ax.plot( trial_numbers, best_values_info.values, marker="o", color=cmap(3) if len(info_list) == 1 else cmap(2 * i + 1), alpha=0.5, label=best_values_info.label_name, ) if best_values_info.stds is not None: lower = np.array(best_values_info.values) - np.array( best_values_info.stds) upper = np.array(best_values_info.values) + np.array( best_values_info.stds) ax.fill_between( x=trial_numbers, y1=lower, y2=upper, color="tab:red", alpha=0.4, ) ax.legend() plt.legend(bbox_to_anchor=(1.05, 1.0), loc="upper left") return ax
def _get_optimization_history_plot( study: Study, target: Optional[Callable[[FrozenTrial], float]], target_name: str, ) -> "Axes": # Set up the graph style. plt.style.use("ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Optimization History Plot") ax.set_xlabel("#Trials") ax.set_ylabel(target_name) cmap = plt.get_cmap("tab10") # Use tab10 colormap for similar outputs to plotly. # Prepare data for plotting. trials = [t for t in study.trials if t.state == TrialState.COMPLETE] if len(trials) == 0: _logger.warning("Study instance does not contain trials.") return ax best_values = [float("inf")] if study.direction == StudyDirection.MINIMIZE else [-float("inf")] comp = min if study.direction == StudyDirection.MINIMIZE else max for trial in trials: trial_value = trial.value assert trial_value is not None # For mypy best_values.append(comp(best_values[-1], trial_value)) best_values.pop(0) # Draw a scatter plot and a line plot. if target is None: ax.scatter( x=[t.number for t in trials], y=[t.value for t in trials], color=cmap(0), alpha=1, label=target_name, ) ax.plot( [t.number for t in trials], best_values, marker="o", color=cmap(3), alpha=0.5, label="Best Value", ) else: ax.scatter( x=[t.number for t in trials], y=[target(t) for t in trials], color=cmap(0), alpha=1, label=target_name, ) ax.legend() return ax
def _get_pareto_front_3d(info: _ParetoFrontInfo) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. fig = plt.figure() ax = fig.add_subplot(projection="3d") ax.set_title("Pareto-front Plot") cmap = plt.get_cmap( "tab10") # Use tab10 colormap for similar outputs to plotly. ax.set_xlabel(info.target_names[info.axis_order[0]]) ax.set_ylabel(info.target_names[info.axis_order[1]]) ax.set_zlabel(info.target_names[info.axis_order[2]]) if info.non_best_trials_with_values is not None and len( info.non_best_trials_with_values) > 0: ax.scatter( xs=[ values[info.axis_order[0]] for _, values in info.non_best_trials_with_values ], ys=[ values[info.axis_order[1]] for _, values in info.non_best_trials_with_values ], zs=[ values[info.axis_order[2]] for _, values in info.non_best_trials_with_values ], color=cmap(0), label="Trial", ) if info.best_trials_with_values is not None and len( info.best_trials_with_values): ax.scatter( xs=[ values[info.axis_order[0]] for _, values in info.best_trials_with_values ], ys=[ values[info.axis_order[1]] for _, values in info.best_trials_with_values ], zs=[ values[info.axis_order[2]] for _, values in info.best_trials_with_values ], color=cmap(3), label="Best Trial", ) if info.non_best_trials_with_values is not None and ax.has_data(): ax.legend() return ax
def _get_optimization_histories( studies: List[Study], target: Optional[Callable[[FrozenTrial], float]], target_name: str, ax: "Axes", ) -> "Axes": cmap = plt.get_cmap( "tab10") # Use tab10 colormap for similar outputs to plotly. # Draw a scatter plot and a line plot. for i, study in enumerate(studies): trials = study.get_trials(states=(TrialState.COMPLETE, )) if target is None: if study.direction == StudyDirection.MINIMIZE: best_values = np.minimum.accumulate( [cast(float, t.value) for t in trials]) else: best_values = np.maximum.accumulate( [cast(float, t.value) for t in trials]) ax.scatter( x=[t.number for t in trials], y=[t.value for t in trials], color=cmap(0) if len(studies) == 1 else cmap(2 * i), alpha=1, label=target_name if len(studies) == 1 else f"{target_name} of {study.study_name}", ) ax.plot( [t.number for t in trials], best_values, marker="o", color=cmap(3) if len(studies) == 1 else cmap(2 * i + 1), alpha=0.5, label="Best Value" if len(studies) == 1 else f"Best Values of {study.study_name}", ) ax.legend() else: ax.scatter( x=[t.number for t in trials], y=[target(t) for t in trials], color=cmap(0) if len(studies) == 0 else cmap(2 * i), alpha=1, label=target_name if len(studies) == 1 else f"{target_name} of {study.study_name}", ) return ax
def _get_intermediate_plot(study: Study) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots(tight_layout=True) ax.set_title("Intermediate Values Plot") ax.set_xlabel("Step") ax.set_ylabel("Intermediate Value") cmap = plt.get_cmap("tab20") # Use tab20 colormap for multiple line plots. # Prepare data for plotting. target_state = [TrialState.PRUNED, TrialState.COMPLETE, TrialState.RUNNING] trials = [trial for trial in study.trials if trial.state in target_state] if len(trials) == 0: _logger.warning("Study instance does not contain trials.") return ax # Draw multiple line plots. traces = [] for i, trial in enumerate(trials): if trial.intermediate_values: sorted_intermediate_values = sorted( trial.intermediate_values.items()) trace = ax.plot( tuple((x for x, _ in sorted_intermediate_values)), tuple((y for _, y in sorted_intermediate_values)), color=cmap(i), alpha=0.7, label="Trial{}".format(trial.number), ) traces.append(trace) if not traces: _logger.warning( "You need to set up the pruning feature to utilize `plot_intermediate_values()`" ) return ax if len(trials) >= 2: ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0) return ax
def _get_slice_plot(info: _SlicePlotInfo) -> "Axes": if len(info.subplots) == 0: _, ax = plt.subplots() return ax # Set up the graph style. cmap = plt.get_cmap("Blues") padding_ratio = 0.05 plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. if len(info.subplots) == 1: # Set up the graph style. fig, axs = plt.subplots() axs.set_title("Slice Plot") # Draw a scatter plot. sc = _generate_slice_subplot(info.subplots[0], axs, cmap, padding_ratio, info.target_name) else: # Set up the graph style. min_figwidth = matplotlib.rcParams["figure.figsize"][0] / 2 fighight = matplotlib.rcParams["figure.figsize"][1] # Ensure that each subplot has a minimum width without relying on auto-sizing. fig, axs = plt.subplots( 1, len(info.subplots), sharey=True, figsize=(min_figwidth * len(info.subplots), fighight), ) fig.suptitle("Slice Plot") # Draw scatter plots. for i, subplot in enumerate(info.subplots): ax = axs[i] sc = _generate_slice_subplot(subplot, ax, cmap, padding_ratio, info.target_name) axcb = fig.colorbar(sc, ax=axs) axcb.set_label("Trial") return axs
def _get_slice_plot(study: Study, params: Optional[List[str]] = None) -> "Axes": # Calculate basic numbers for plotting. trials = [ trial for trial in study.trials if trial.state == TrialState.COMPLETE ] if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") _, ax = plt.subplots() return ax all_params = {p_name for t in trials for p_name in t.params.keys()} if params is None: sorted_params = sorted(list(all_params)) else: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) sorted_params = sorted(list(set(params))) n_params = len(sorted_params) # Set up the graph style. cmap = plt.get_cmap("Blues") padding_ratio = 0.05 plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. # Prepare data. obj_values = [t.value for t in trials] if n_params == 1: # Set up the graph style. fig, axs = plt.subplots() axs.set_title("Slice Plot") # Draw a scatter plot. sc = _generate_slice_subplot( trials, sorted_params[0], axs, cmap, padding_ratio, obj_values # type: ignore ) else: # Set up the graph style. min_figwidth = matplotlib.rcParams["figure.figsize"][0] / 2 fighight = matplotlib.rcParams["figure.figsize"][1] # Ensure that each subplot has a minimum width without relying on auto-sizing. fig, axs = plt.subplots(1, n_params, sharey=True, figsize=(min_figwidth * n_params, fighight)) fig.suptitle("Slice Plot") # Draw scatter plots. for i, param in enumerate(sorted_params): ax = axs[i] sc = _generate_slice_subplot( trials, param, ax, cmap, padding_ratio, obj_values # type: ignore ) axcb = fig.colorbar(sc, ax=axs) axcb.set_label("#Trials") return axs
def _get_parallel_coordinate_plot(info: _ParallelCoordinateInfo) -> "Axes": reversescale = info.reverse_scale target_name = info.target_name # Set up the graph style. fig, ax = plt.subplots() cmap = plt.get_cmap("Blues_r" if reversescale else "Blues") ax.set_title("Parallel Coordinate Plot") ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) # Prepare data for plotting. if len(info.dims_params) == 0 or len(info.dim_objective.values) == 0: return ax obj_min = info.dim_objective.range[0] obj_max = info.dim_objective.range[1] obj_w = obj_max - obj_min dims_obj_base = [[o] for o in info.dim_objective.values] for dim in info.dims_params: p_min = dim.range[0] p_max = dim.range[1] p_w = p_max - p_min if p_w == 0.0: center = obj_w / 2 + obj_min for i in range(len(dim.values)): dims_obj_base[i].append(center) else: for i, v in enumerate(dim.values): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) # Draw multiple line plots and axes. # Ref: https://stackoverflow.com/a/50029441 n_params = len(info.dims_params) ax.set_xlim(0, n_params) ax.set_ylim(info.dim_objective.range[0], info.dim_objective.range[1]) xs = [range(n_params + 1) for _ in range(len(dims_obj_base))] segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)] lc = LineCollection(segments, cmap=cmap) lc.set_array(np.asarray(info.dim_objective.values)) axcb = fig.colorbar(lc, pad=0.1) axcb.set_label(target_name) var_names = [info.dim_objective.label ] + [dim.label for dim in info.dims_params] plt.xticks(range(n_params + 1), var_names, rotation=330) for i, dim in enumerate(info.dims_params): ax2 = ax.twinx() if dim.is_log: ax2.set_ylim(np.power(10, dim.range[0]), np.power(10, dim.range[1])) ax2.set_yscale("log") else: ax2.set_ylim(dim.range[0], dim.range[1]) ax2.spines["top"].set_visible(False) ax2.spines["bottom"].set_visible(False) ax2.xaxis.set_visible(False) ax2.spines["right"].set_position(("axes", (i + 1) / n_params)) if dim.is_cat: ax2.set_yticks(dim.tickvals) ax2.set_yticklabels(dim.ticktext) ax.add_collection(lc) return ax
def _get_pareto_front_2d( study: Study, target_names: Optional[List[str]], include_dominated_trials: bool = False, axis_order: Optional[List[int]] = None, ) -> "Axes": # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Pareto-front Plot") cmap = plt.get_cmap( "tab10") # Use tab10 colormap for similar outputs to plotly. if target_names is None: target_names = ["Objective 0", "Objective 1"] elif len(target_names) != 2: raise ValueError("The length of `target_names` is supposed to be 2.") # Prepare data for plotting. trials = study.best_trials if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") if include_dominated_trials: non_pareto_trials = _get_non_pareto_front_trials(study, trials) trials += non_pareto_trials if axis_order is None: axis_order = list(range(2)) else: if len(axis_order) != 2: raise ValueError( f"Size of `axis_order` {axis_order}. Expect: 2, Actual: {len(axis_order)}." ) if len(set(axis_order)) != 2: raise ValueError( f"Elements of given `axis_order` {axis_order} are not unique!") if max(axis_order) > 1: raise ValueError( f"Given `axis_order` {axis_order} contains invalid index {max(axis_order)} " "higher than 1.") if min(axis_order) < 0: raise ValueError( f"Given `axis_order` {axis_order} contains invalid index {min(axis_order)} " "lower than 0.") ax.set_xlabel(target_names[axis_order[0]]) ax.set_ylabel(target_names[axis_order[1]]) if len(trials) - len(study.best_trials) != 0: ax.scatter( x=[ t.values[axis_order[0]] for t in trials[len(study.best_trials):] ], y=[ t.values[axis_order[1]] for t in trials[len(study.best_trials):] ], color=cmap(0), label="Trial", ) if len(study.best_trials): ax.scatter( x=[ t.values[axis_order[0]] for t in trials[:len(study.best_trials)] ], y=[ t.values[axis_order[1]] for t in trials[:len(study.best_trials)] ], color=cmap(3), label="Best Trial", ) if include_dominated_trials and ax.has_data(): ax.legend() return ax
def _set_cmap(study: Study, target: Optional[Callable[[FrozenTrial], float]]) -> "Colormap": cmap = "Blues_r" if target is None and study.direction == StudyDirection.MAXIMIZE else "Blues" return plt.get_cmap(cmap)
def plot_edf( study: Union[Study, Sequence[Study]], *, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": """Plot the objective value EDF (empirical distribution function) of a study with Matplotlib. Note that only the complete trials are considered when plotting the EDF. .. seealso:: Please refer to :func:`optuna.visualization.plot_edf` for an example, where this function can be replaced with it. .. note:: Please refer to `matplotlib.pyplot.legend <https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.legend.html>`_ to adjust the style of the generated legend. Example: The following code snippet shows how to plot EDF. .. plot:: import math import optuna def ackley(x, y): a = 20 * math.exp(-0.2 * math.sqrt(0.5 * (x ** 2 + y ** 2))) b = math.exp(0.5 * (math.cos(2 * math.pi * x) + math.cos(2 * math.pi * y))) return -a - b + math.e + 20 def objective(trial, low, high): x = trial.suggest_float("x", low, high) y = trial.suggest_float("y", low, high) return ackley(x, y) sampler = optuna.samplers.RandomSampler(seed=10) # Widest search space. study0 = optuna.create_study(study_name="x=[0,5), y=[0,5)", sampler=sampler) study0.optimize(lambda t: objective(t, 0, 5), n_trials=500) # Narrower search space. study1 = optuna.create_study(study_name="x=[0,4), y=[0,4)", sampler=sampler) study1.optimize(lambda t: objective(t, 0, 4), n_trials=500) # Narrowest search space but it doesn't include the global optimum point. study2 = optuna.create_study(study_name="x=[1,3), y=[1,3)", sampler=sampler) study2.optimize(lambda t: objective(t, 1, 3), n_trials=500) optuna.visualization.matplotlib.plot_edf([study0, study1, study2]) Args: study: A target :class:`~optuna.study.Study` object. You can pass multiple studies if you want to compare those EDFs. target: A function to specify the value to display. If it is :obj:`None` and ``study`` is being used for single-objective optimization, the objective values are plotted. .. note:: Specify this argument if ``study`` is being used for multi-objective optimization. target_name: Target's name to display on the axis label. Returns: A :class:`matplotlib.axes.Axes` object. """ _imports.check() # Set up the graph style. plt.style.use( "ggplot") # Use ggplot style sheet for similar outputs to plotly. _, ax = plt.subplots() ax.set_title("Empirical Distribution Function Plot") ax.set_xlabel(target_name) ax.set_ylabel("Cumulative Probability") ax.set_ylim(0, 1) cmap = plt.get_cmap("tab20") # Use tab20 colormap for multiple line plots. info = _get_edf_info(study, target, target_name) edf_lines = info.lines if len(edf_lines) == 0: return ax for i, (study_name, y_values) in enumerate(edf_lines): ax.plot(info.x_values, y_values, color=cmap(i), alpha=0.7, label=study_name) if len(edf_lines) >= 2: ax.legend() return ax
def _set_cmap(study: Study) -> "Colormap": cmap = "Blues_r" if study.direction == StudyDirection.MINIMIZE else "Blues" return plt.get_cmap(cmap)
def _set_cmap(reverse_scale: bool) -> "Colormap": cmap = "Blues_r" if not reverse_scale else "Blues" return plt.get_cmap(cmap)
def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target reversescale = study.direction == StudyDirection.MINIMIZE else: reversescale = True # Set up the graph style. fig, ax = plt.subplots() cmap = plt.get_cmap("Blues_r" if reversescale else "Blues") ax.set_title("Parallel Coordinate Plot") ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) # Prepare data for plotting. trials = [ trial for trial in study.trials if trial.state == TrialState.COMPLETE ] if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return ax all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(list(all_params)) obj_org = [target(t) for t in trials] obj_min = min(obj_org) obj_max = max(obj_org) obj_w = obj_max - obj_min dims_obj_base = [[o] for o in obj_org] cat_param_names = [] cat_param_values = [] cat_param_ticks = [] log_param_names = [] param_values = [] var_names = [target_name] for p_name in sorted_params: values = [ t.params[p_name] if p_name in t.params else np.nan for t in trials ] if _is_log_scale(trials, p_name): p_min = math.log10(min(values)) p_max = math.log10(max(values)) p_w = p_max - p_min log_param_names.append(p_name) for i, v in enumerate(values): dims_obj_base[i].append((math.log10(v) - p_min) / p_w * obj_w + obj_min) elif _is_categorical(trials, p_name): vocab = defaultdict( lambda: len(vocab)) # type: DefaultDict[str, int] values = [vocab[v] for v in values] cat_param_names.append(p_name) vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1]) cat_param_values.append([v[0] for v in vocab_item_sorted]) cat_param_ticks.append([v[1] for v in vocab_item_sorted]) p_min = min(values) p_max = max(values) p_w = p_max - p_min for i, v in enumerate(values): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) else: p_min = min(values) p_max = max(values) p_w = p_max - p_min for i, v in enumerate(values): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) var_names.append( p_name if len(p_name) < 20 else "{}...".format(p_name[:17])) param_values.append(values) # Draw multiple line plots and axes. # Ref: https://stackoverflow.com/a/50029441 ax.set_xlim(0, len(sorted_params)) ax.set_ylim(obj_min, obj_max) xs = [range(0, len(sorted_params) + 1) for i in range(len(dims_obj_base))] segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)] lc = LineCollection(segments, cmap=cmap) lc.set_array(np.asarray([target(t) for t in trials] + [0])) axcb = fig.colorbar(lc, pad=0.1) axcb.set_label(target_name) plt.xticks(range(0, len(sorted_params) + 1), var_names, rotation=330) for i, p_name in enumerate(sorted_params): ax2 = ax.twinx() ax2.set_ylim(min(param_values[i]), max(param_values[i])) if _is_log_scale(trials, p_name): ax2.set_yscale("log") ax2.spines["top"].set_visible(False) ax2.spines["bottom"].set_visible(False) ax2.get_xaxis().set_visible(False) ax2.plot([1] * len(param_values[i]), param_values[i], visible=False) ax2.spines["right"].set_position( ("axes", (i + 1) / len(sorted_params))) if p_name in cat_param_names: idx = cat_param_names.index(p_name) tick_pos = cat_param_ticks[idx] tick_labels = cat_param_values[idx] ax2.set_yticks(tick_pos) ax2.set_yticklabels(tick_labels) ax.add_collection(lc) return ax
def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target reversescale = study.direction == StudyDirection.MINIMIZE else: reversescale = True # Set up the graph style. fig, ax = plt.subplots() cmap = plt.get_cmap("Blues_r" if reversescale else "Blues") ax.set_title("Parallel Coordinate Plot") ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) # Prepare data for plotting. trials = [ trial for trial in study.trials if trial.state == TrialState.COMPLETE ] if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return ax all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(all_params) skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params) obj_org = [ target(t) for t in trials if t.number not in skipped_trial_numbers ] if len(obj_org) == 0: _logger.warning( "Your study has only completed trials with missing parameters.") return ax obj_min = min(obj_org) obj_max = max(obj_org) obj_w = obj_max - obj_min dims_obj_base = [[o] for o in obj_org] cat_param_names = [] cat_param_values = [] cat_param_ticks = [] param_values = [] var_names = [target_name] numeric_cat_params_indices: List[int] = [] for param_index, p_name in enumerate(sorted_params): values = [ t.params[p_name] for t in trials if t.number not in skipped_trial_numbers ] if _is_categorical(trials, p_name): vocab = defaultdict( lambda: len(vocab)) # type: DefaultDict[str, int] if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] numeric_cat_params_indices.append(param_index) values = [vocab[v] for v in values] cat_param_names.append(p_name) vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1]) cat_param_values.append([v[0] for v in vocab_item_sorted]) cat_param_ticks.append([v[1] for v in vocab_item_sorted]) if _is_log_scale(trials, p_name): values_for_lc = [np.log10(v) for v in values] else: values_for_lc = values p_min = min(values_for_lc) p_max = max(values_for_lc) p_w = p_max - p_min if p_w == 0.0: center = obj_w / 2 + obj_min for i in range(len(values)): dims_obj_base[i].append(center) else: for i, v in enumerate(values_for_lc): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) var_names.append( p_name if len(p_name) < 20 else "{}...".format(p_name[:17])) param_values.append(values) if numeric_cat_params_indices: # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. sorted_idx = np.lexsort([ param_values[index] for index in numeric_cat_params_indices ][::-1]) # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. param_values = [list(np.array(v)[sorted_idx]) for v in param_values] # Draw multiple line plots and axes. # Ref: https://stackoverflow.com/a/50029441 ax.set_xlim(0, len(sorted_params)) ax.set_ylim(obj_min, obj_max) xs = [range(len(sorted_params) + 1) for _ in range(len(dims_obj_base))] segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)] lc = LineCollection(segments, cmap=cmap) lc.set_array(np.asarray(obj_org)) axcb = fig.colorbar(lc, pad=0.1) axcb.set_label(target_name) plt.xticks(range(len(sorted_params) + 1), var_names, rotation=330) for i, p_name in enumerate(sorted_params): ax2 = ax.twinx() ax2.set_ylim(min(param_values[i]), max(param_values[i])) if _is_log_scale(trials, p_name): ax2.set_yscale("log") ax2.spines["top"].set_visible(False) ax2.spines["bottom"].set_visible(False) ax2.xaxis.set_visible(False) ax2.plot([1] * len(param_values[i]), param_values[i], visible=False) ax2.spines["right"].set_position( ("axes", (i + 1) / len(sorted_params))) if p_name in cat_param_names: idx = cat_param_names.index(p_name) tick_pos = cat_param_ticks[idx] tick_labels = cat_param_values[idx] ax2.set_yticks(tick_pos) ax2.set_yticklabels(tick_labels) ax.add_collection(lc) return ax