def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target reversescale = study.direction == StudyDirection.MINIMIZE else: reversescale = True # Set up the graph style. fig, ax = plt.subplots() cmap = plt.get_cmap("Blues_r" if reversescale else "Blues") ax.set_title("Parallel Coordinate Plot") ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) # Prepare data for plotting. trials = [ trial for trial in study.trials if trial.state == TrialState.COMPLETE ] if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return ax all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(all_params) skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params) obj_org = [ target(t) for t in trials if t.number not in skipped_trial_numbers ] if len(obj_org) == 0: _logger.warning( "Your study has only completed trials with missing parameters.") return ax obj_min = min(obj_org) obj_max = max(obj_org) obj_w = obj_max - obj_min dims_obj_base = [[o] for o in obj_org] cat_param_names = [] cat_param_values = [] cat_param_ticks = [] param_values = [] var_names = [target_name] numeric_cat_params_indices: List[int] = [] for param_index, p_name in enumerate(sorted_params): values = [ t.params[p_name] for t in trials if t.number not in skipped_trial_numbers ] if _is_categorical(trials, p_name): vocab = defaultdict( lambda: len(vocab)) # type: DefaultDict[str, int] if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] numeric_cat_params_indices.append(param_index) values = [vocab[v] for v in values] cat_param_names.append(p_name) vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1]) cat_param_values.append([v[0] for v in vocab_item_sorted]) cat_param_ticks.append([v[1] for v in vocab_item_sorted]) if _is_log_scale(trials, p_name): values_for_lc = [np.log10(v) for v in values] else: values_for_lc = values p_min = min(values_for_lc) p_max = max(values_for_lc) p_w = p_max - p_min if p_w == 0.0: center = obj_w / 2 + obj_min for i in range(len(values)): dims_obj_base[i].append(center) else: for i, v in enumerate(values_for_lc): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) var_names.append( p_name if len(p_name) < 20 else "{}...".format(p_name[:17])) param_values.append(values) if numeric_cat_params_indices: # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. sorted_idx = np.lexsort([ param_values[index] for index in numeric_cat_params_indices ][::-1]) # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. param_values = [list(np.array(v)[sorted_idx]) for v in param_values] # Draw multiple line plots and axes. # Ref: https://stackoverflow.com/a/50029441 ax.set_xlim(0, len(sorted_params)) ax.set_ylim(obj_min, obj_max) xs = [range(len(sorted_params) + 1) for _ in range(len(dims_obj_base))] segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)] lc = LineCollection(segments, cmap=cmap) lc.set_array(np.asarray(obj_org)) axcb = fig.colorbar(lc, pad=0.1) axcb.set_label(target_name) plt.xticks(range(len(sorted_params) + 1), var_names, rotation=330) for i, p_name in enumerate(sorted_params): ax2 = ax.twinx() ax2.set_ylim(min(param_values[i]), max(param_values[i])) if _is_log_scale(trials, p_name): ax2.set_yscale("log") ax2.spines["top"].set_visible(False) ax2.spines["bottom"].set_visible(False) ax2.xaxis.set_visible(False) ax2.plot([1] * len(param_values[i]), param_values[i], visible=False) ax2.spines["right"].set_position( ("axes", (i + 1) / len(sorted_params))) if p_name in cat_param_names: idx = cat_param_names.index(p_name) tick_pos = cat_param_ticks[idx] tick_labels = cat_param_values[idx] ax2.set_yticks(tick_pos) ax2.set_yticklabels(tick_labels) ax.add_collection(lc) return ax
def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "go.Figure": layout = go.Layout(title="Parallel Coordinate Plot") reverse_scale = _is_reverse_scale(study, target) trials = _filter_nonfinite( study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)), target=target ) if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return go.Figure(data=[], layout=layout) all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError("Parameter {} does not exist in your study.".format(input_p_name)) all_params = set(params) sorted_params = sorted(all_params) if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target skipped_trial_ids = _get_skipped_trial_numbers(trials, sorted_params) objectives = tuple([target(t) for t in trials if t.number not in skipped_trial_ids]) if len(objectives) == 0: _logger.warning("Your study has only completed trials with missing parameters.") return go.Figure(data=[], layout=layout) dims: List[Dict[str, Any]] = [ { "label": target_name, "values": objectives, "range": (min(objectives), max(objectives)), } ] numeric_cat_params_indices: List[int] = [] for dim_index, p_name in enumerate(sorted_params, start=1): values = [] for t in trials: if t.number in skipped_trial_ids: continue if p_name in t.params: values.append(t.params[p_name]) if _is_log_scale(trials, p_name): values = [math.log10(v) for v in values] min_value = min(values) max_value = max(values) tickvals = list(range(math.ceil(min_value), math.ceil(max_value))) if min_value not in tickvals: tickvals = [min_value] + tickvals if max_value not in tickvals: tickvals = tickvals + [max_value] dim = { "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]), "values": tuple(values), "range": (min_value, max_value), "tickvals": tickvals, "ticktext": ["{:.3g}".format(math.pow(10, x)) for x in tickvals], } elif _is_categorical(trials, p_name): vocab: DefaultDict[str, int] = defaultdict(lambda: len(vocab)) if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys())) numeric_cat_params_indices.append(dim_index) else: values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys(), key=lambda x: vocab[x])) dim = { "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]), "values": tuple(values), "range": (min(values), max(values)), "tickvals": list(range(len(vocab))), "ticktext": ticktext, } else: dim = { "label": p_name if len(p_name) < 20 else "{}...".format(p_name[:17]), "values": tuple(values), "range": (min(values), max(values)), } dims.append(dim) if numeric_cat_params_indices: # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. idx = np.lexsort([dims[index]["values"] for index in numeric_cat_params_indices][::-1]) for dim in dims: # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. dim.update({"values": tuple(np.array(dim["values"])[idx])}) traces = [ go.Parcoords( dimensions=dims, labelangle=30, labelside="bottom", line={ "color": dims[0]["values"], "colorscale": COLOR_SCALE, "colorbar": {"title": target_name}, "showscale": True, "reversescale": reverse_scale, }, ) ] figure = go.Figure(data=traces, layout=layout) return figure
def _get_parallel_coordinate_info( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> _ParallelCoordinateInfo: reverse_scale = _is_reverse_scale(study, target) trials = _filter_nonfinite(study.get_trials( deepcopy=False, states=(TrialState.COMPLETE, )), target=target) all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(all_params) if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params) objectives = tuple( [target(t) for t in trials if t.number not in skipped_trial_numbers]) # The value of (0, 0) is a dummy range. It is ignored when we plot. objective_range = (min(objectives), max(objectives)) if len(objectives) > 0 else (0, 0) dim_objective = _DimensionInfo( label=target_name, values=objectives, range=objective_range, is_log=False, is_cat=False, tickvals=[], ticktext=[], ) if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return _ParallelCoordinateInfo( dim_objective=dim_objective, dims_params=[], reverse_scale=reverse_scale, target_name=target_name, ) if len(objectives) == 0: _logger.warning( "Your study has only completed trials with missing parameters.") return _ParallelCoordinateInfo( dim_objective=dim_objective, dims_params=[], reverse_scale=reverse_scale, target_name=target_name, ) numeric_cat_params_indices: List[int] = [] dims = [] for dim_index, p_name in enumerate(sorted_params, start=1): values = [] for t in trials: if t.number in skipped_trial_numbers: continue if p_name in t.params: values.append(t.params[p_name]) if _is_log_scale(trials, p_name): values = [math.log10(v) for v in values] min_value = min(values) max_value = max(values) tickvals = list(range(math.ceil(min_value), math.ceil(max_value))) if min_value not in tickvals: tickvals = [min_value] + tickvals if max_value not in tickvals: tickvals = tickvals + [max_value] dim = _DimensionInfo( label=_truncate_label(p_name), values=tuple(values), range=(min_value, max_value), is_log=True, is_cat=False, tickvals=tickvals, ticktext=["{:.3g}".format(math.pow(10, x)) for x in tickvals], ) elif _is_categorical(trials, p_name): vocab: DefaultDict[str, int] = defaultdict(lambda: len(vocab)) if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys())) numeric_cat_params_indices.append(dim_index) else: values = [vocab[v] for v in values] ticktext = list(sorted(vocab.keys(), key=lambda x: vocab[x])) dim = _DimensionInfo( label=_truncate_label(p_name), values=tuple(values), range=(min(values), max(values)), is_log=False, is_cat=True, tickvals=list(range(len(vocab))), ticktext=ticktext, ) else: dim = _DimensionInfo( label=_truncate_label(p_name), values=tuple(values), range=(min(values), max(values)), is_log=False, is_cat=False, tickvals=[], ticktext=[], ) dims.append(dim) if numeric_cat_params_indices: dims.insert(0, dim_objective) # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. idx = np.lexsort( [dims[index].values for index in numeric_cat_params_indices][::-1]) updated_dims = [] for dim in dims: # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. updated_dims.append( _DimensionInfo( label=dim.label, values=tuple(np.array(dim.values)[idx]), range=dim.range, is_log=dim.is_log, is_cat=dim.is_cat, tickvals=dim.tickvals, ticktext=dim.ticktext, )) dim_objective = updated_dims[0] dims = updated_dims[1:] return _ParallelCoordinateInfo( dim_objective=dim_objective, dims_params=dims, reverse_scale=reverse_scale, target_name=target_name, )