def _generate_slice_subplot( trials: List[FrozenTrial], param: str, ax: "Axes", cmap: "Colormap", padding_ratio: float, obj_values: List[Union[int, float]], target_name: str, ) -> "PathCollection": x_values = [] y_values = [] trial_numbers = [] scale = None for t, obj_v in zip(trials, obj_values): if param in t.params: x_values.append(t.params[param]) y_values.append(obj_v) trial_numbers.append(t.number) ax.set(xlabel=param, ylabel=target_name) if _is_log_scale(trials, param): ax.set_xscale("log") scale = "log" elif not _is_numerical(trials, param): x_values = [str(x) for x in x_values] scale = "categorical" xlim = _calc_lim_with_padding(x_values, padding_ratio, scale) ax.set_xlim(xlim[0], xlim[1]) sc = ax.scatter(x_values, y_values, c=trial_numbers, cmap=cmap, edgecolors="grey") ax.label_outer() return sc
def _calculate_griddata( trials: List[FrozenTrial], x_param: str, x_indices: List[Union[str, int, float]], y_param: str, y_indices: List[Union[str, int, float]], contour_point_num: int, target: Optional[Callable[[FrozenTrial], float]], ) -> Tuple[ np.ndarray, np.ndarray, np.ndarray, List[Union[int, float]], List[Union[int, float]], List[Union[int, float]], List[Union[int, float]], List[int], List[str], List[int], List[str], int, int, ]: # Extract values for x, y, z axes from each trail. x_values = [] y_values = [] z_values = [] x_range_values = [] y_range_values = [] for trial in trials: contains_x_param = x_param in trial.params if contains_x_param: x_range_values.append(trial.params[x_param]) contains_y_param = y_param in trial.params if contains_y_param: y_range_values.append(trial.params[y_param]) if not contains_x_param or not contains_y_param: continue x_values.append(trial.params[x_param]) y_values.append(trial.params[y_param]) if target is None: value = trial.value else: value = target(trial) if isinstance(value, int): value = float(value) elif not isinstance(value, float): raise ValueError( "Trial{} has COMPLETE state, but its target value is non-numeric.".format( trial.number ) ) z_values.append(value) # Return empty values when x or y has no value. if len(x_values) == 0 or len(y_values) == 0: return ( np.array([]), np.array([]), np.array([]), x_values, y_values, [], [], [], [], [], [], 0, 0, ) # Add dummy values for grid data calculation when a parameter has one unique value. x_values_dummy = [] y_values_dummy = [] if len(set(x_values)) == 1: x_values_dummy = [x for x in x_indices if x not in x_values] x_values = x_values + x_values_dummy * len(x_values) y_values = y_values + (y_values * len(x_values_dummy)) z_values = z_values + (z_values * len(x_values_dummy)) if len(set(y_values)) == 1: y_values_dummy = [y for y in y_indices if y not in y_values] y_values = y_values + y_values_dummy * len(y_values) x_values = x_values + (x_values * len(y_values_dummy)) z_values = z_values + (z_values * len(y_values_dummy)) # Convert categorical values to int. cat_param_labels_x = [] # type: List[str] cat_param_pos_x = [] # type: List[int] cat_param_labels_y = [] # type: List[str] cat_param_pos_y = [] # type: List[int] if not _is_numerical(trials, x_param): enc = _LabelEncoder() x_range_values = enc.fit_transform(list(map(str, x_range_values))) x_values = enc.transform(list(map(str, x_values))) cat_param_labels_x = enc.get_labels() cat_param_pos_x = enc.get_indices() if not _is_numerical(trials, y_param): enc = _LabelEncoder() y_range_values = enc.fit_transform(list(map(str, y_range_values))) y_values = enc.transform(list(map(str, y_values))) cat_param_labels_y = enc.get_labels() cat_param_pos_y = enc.get_indices() # Calculate min and max of x and y. x_values_min = min(x_range_values) x_values_max = max(x_range_values) y_values_min = min(y_range_values) y_values_max = max(y_range_values) # Calculate grid data points. # For x and y, create 1-D array of evenly spaced coordinates on linear or log scale. xi: np.ndarray = np.array([]) yi: np.ndarray = np.array([]) zi: np.ndarray = np.array([]) if _is_log_scale(trials, x_param): padding_x = (np.log10(x_values_max) - np.log10(x_values_min)) * AXES_PADDING_RATIO x_values_min = np.power(10, np.log10(x_values_min) - padding_x) x_values_max = np.power(10, np.log10(x_values_max) + padding_x) xi = np.logspace(np.log10(x_values_min), np.log10(x_values_max), contour_point_num) else: padding_x = (x_values_max - x_values_min) * AXES_PADDING_RATIO x_values_min -= padding_x x_values_max += padding_x xi = np.linspace(x_values_min, x_values_max, contour_point_num) if _is_log_scale(trials, y_param): padding_y = (np.log10(y_values_max) - np.log10(y_values_min)) * AXES_PADDING_RATIO y_values_min = np.power(10, np.log10(y_values_min) - padding_y) y_values_max = np.power(10, np.log10(y_values_max) + padding_y) yi = np.logspace(np.log10(y_values_min), np.log10(y_values_max), contour_point_num) else: padding_y = (y_values_max - y_values_min) * AXES_PADDING_RATIO y_values_min -= padding_y y_values_max += padding_y yi = np.linspace(y_values_min, y_values_max, contour_point_num) # create irregularly spaced map of trial values # and interpolate it with Plotly's interpolation formulation if x_param != y_param: zmap = _create_zmap(x_values, y_values, z_values, xi, yi) zi = _interpolate_zmap(zmap, contour_point_num) return ( xi, yi, zi, x_values, y_values, [x_values_min, x_values_max], [y_values_min, y_values_max], cat_param_pos_x, cat_param_labels_x, cat_param_pos_y, cat_param_labels_y, len(x_values_dummy), len(y_values_dummy), )
def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target reversescale = study.direction == StudyDirection.MINIMIZE else: reversescale = True # Set up the graph style. fig, ax = plt.subplots() cmap = plt.get_cmap("Blues_r" if reversescale else "Blues") ax.set_title("Parallel Coordinate Plot") ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) # Prepare data for plotting. trials = [ trial for trial in study.trials if trial.state == TrialState.COMPLETE ] if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return ax all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(all_params) skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params) obj_org = [ target(t) for t in trials if t.number not in skipped_trial_numbers ] if len(obj_org) == 0: _logger.warning( "Your study has only completed trials with missing parameters.") return ax obj_min = min(obj_org) obj_max = max(obj_org) obj_w = obj_max - obj_min dims_obj_base = [[o] for o in obj_org] cat_param_names = [] cat_param_values = [] cat_param_ticks = [] param_values = [] var_names = [target_name] numeric_cat_params_indices: List[int] = [] for param_index, p_name in enumerate(sorted_params): values = [ t.params[p_name] for t in trials if t.number not in skipped_trial_numbers ] if _is_categorical(trials, p_name): vocab = defaultdict( lambda: len(vocab)) # type: DefaultDict[str, int] if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] numeric_cat_params_indices.append(param_index) values = [vocab[v] for v in values] cat_param_names.append(p_name) vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1]) cat_param_values.append([v[0] for v in vocab_item_sorted]) cat_param_ticks.append([v[1] for v in vocab_item_sorted]) if _is_log_scale(trials, p_name): values_for_lc = [np.log10(v) for v in values] else: values_for_lc = values p_min = min(values_for_lc) p_max = max(values_for_lc) p_w = p_max - p_min if p_w == 0.0: center = obj_w / 2 + obj_min for i in range(len(values)): dims_obj_base[i].append(center) else: for i, v in enumerate(values_for_lc): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) var_names.append( p_name if len(p_name) < 20 else "{}...".format(p_name[:17])) param_values.append(values) if numeric_cat_params_indices: # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. sorted_idx = np.lexsort([ param_values[index] for index in numeric_cat_params_indices ][::-1]) # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. param_values = [list(np.array(v)[sorted_idx]) for v in param_values] # Draw multiple line plots and axes. # Ref: https://stackoverflow.com/a/50029441 ax.set_xlim(0, len(sorted_params)) ax.set_ylim(obj_min, obj_max) xs = [range(len(sorted_params) + 1) for _ in range(len(dims_obj_base))] segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)] lc = LineCollection(segments, cmap=cmap) lc.set_array(np.asarray(obj_org)) axcb = fig.colorbar(lc, pad=0.1) axcb.set_label(target_name) plt.xticks(range(len(sorted_params) + 1), var_names, rotation=330) for i, p_name in enumerate(sorted_params): ax2 = ax.twinx() ax2.set_ylim(min(param_values[i]), max(param_values[i])) if _is_log_scale(trials, p_name): ax2.set_yscale("log") ax2.spines["top"].set_visible(False) ax2.spines["bottom"].set_visible(False) ax2.xaxis.set_visible(False) ax2.plot([1] * len(param_values[i]), param_values[i], visible=False) ax2.spines["right"].set_position( ("axes", (i + 1) / len(sorted_params))) if p_name in cat_param_names: idx = cat_param_names.index(p_name) tick_pos = cat_param_ticks[idx] tick_labels = cat_param_values[idx] ax2.set_yticks(tick_pos) ax2.set_yticklabels(tick_labels) ax.add_collection(lc) return ax
def _calculate_griddata( trials: List[FrozenTrial], x_param: str, x_indices: List[Union[str, int, float]], y_param: str, y_indices: List[Union[str, int, float]], contour_point_num: int, target: Optional[Callable[[FrozenTrial], float]], ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[Union[ int, float]], List[Union[int, float]], List[Union[ int, float]], List[Union[int, float]], List[int], List[str], List[int], List[str], int, int, ]: # Extract values for x, y, z axes from each trail. x_values = [] y_values = [] z_values = [] for trial in trials: if x_param not in trial.params or y_param not in trial.params: continue x_values.append(trial.params[x_param]) y_values.append(trial.params[y_param]) if target is None: value = trial.value else: value = target(trial) if isinstance(value, int): value = float(value) elif not isinstance(value, float): raise ValueError( "Trial{} has COMPLETE state, but its target value is non-numeric." .format(trial.number)) z_values.append(value) # Return empty values when x or y has no value. if len(x_values) == 0 or len(y_values) == 0: return ( np.array([]), np.array([]), np.array([]), x_values, y_values, [], [], [], [], [], [], 0, 0, ) # Add dummy values for grid data calculation when a parameter has one unique value. x_values_dummy = [] y_values_dummy = [] if len(set(x_values)) == 1: x_values_dummy = [x for x in x_indices if x not in x_values] x_values = x_values + x_values_dummy * len(x_values) y_values = y_values + (y_values * len(x_values_dummy)) z_values = z_values + (z_values * len(x_values_dummy)) if len(set(y_values)) == 1: y_values_dummy = [y for y in y_indices if y not in y_values] y_values = y_values + y_values_dummy * len(y_values) x_values = x_values + (x_values * len(y_values_dummy)) z_values = z_values + (z_values * len(y_values_dummy)) # Convert categorical values to int. cat_param_labels_x = [] # type: List[str] cat_param_pos_x = [] # type: List[int] cat_param_labels_y = [] # type: List[str] cat_param_pos_y = [] # type: List[int] if not _is_numerical(trials, x_param): x_values = [str(x) for x in x_values] ( x_values, cat_param_labels_x, cat_param_pos_x, ) = _convert_categorical2int(x_values) if not _is_numerical(trials, y_param): y_values = [str(y) for y in y_values] ( y_values, cat_param_labels_y, cat_param_pos_y, ) = _convert_categorical2int(y_values) # Calculate min and max of x and y. x_values_min = min(x_values) x_values_max = max(x_values) y_values_min = min(y_values) y_values_max = max(y_values) # Calculate grid data points. # For x and y, create 1-D array of evenly spaced coordinates on linear or log scale. xi = np.array([]) yi = np.array([]) zi = np.array([]) if x_param != y_param: if _is_log_scale(trials, x_param): xi = np.logspace(np.log10(x_values_min), np.log10(x_values_max), contour_point_num) else: xi = np.linspace(x_values_min, x_values_max, contour_point_num) if _is_log_scale(trials, y_param): yi = np.logspace(np.log10(y_values_min), np.log10(y_values_max), contour_point_num) else: yi = np.linspace(y_values_min, y_values_max, contour_point_num) # Interpolate z-axis data on a grid with cubic interpolator. # TODO(ytknzw): Implement Plotly-like interpolation algorithm. zi = griddata( np.column_stack((x_values, y_values)), z_values, (xi[None, :], yi[:, None]), method="cubic", ) return ( xi, yi, zi, x_values, y_values, [x_values_min, x_values_max], [y_values_min, y_values_max], cat_param_pos_x, cat_param_labels_x, cat_param_pos_y, cat_param_labels_y, len(x_values_dummy), len(y_values_dummy), )