def plot_steps_to_result(ax, results, add_scaling=True, scaling_label=None, normalizing_batch_size=None): """Plots steps to result vs batch size. Args: ax: Instance of pyplot.axes.Axes on which to plot. results: DataFrame of measurements indexed by (batch_size, step) with one row per batch size. Or, a dictionary of such DataFrames. add_scaling: Whether to draw a line indicating "perfect scaling". scaling_label: The label in the results dictionary used to draw the "perfect scaling" line (provided add_scaling is True). If not specified, a separate line is drawn for each label. normalizing_batch_size: If specified, the steps to result curves are normalized for each label in the results dictionary by the number of steps at this batch size. """ if isinstance(results, pd.DataFrame): results = {"": results} for label, df in results.items(): batch_sizes = get_index_values(df, "batch_size") steps = get_index_values(df, "step") # Possibly normalize the steps. if normalizing_batch_size: normalizing_index = np.where( batch_sizes == normalizing_batch_size)[0] if len(normalizing_index) != 1: raise ValueError( "Expected one row with batch_size={}, but found {}".format( normalizing_batch_size, len(normalizing_index))) steps = steps.astype(np.float) / steps[normalizing_index] # Plot steps to result. ax.plot(batch_sizes, steps, "^-", label=label) # Possibly plot "perfect scaling". if add_scaling and (not scaling_label or label == scaling_label): if normalizing_batch_size: scale = steps[normalizing_index] * normalizing_batch_size else: scale = steps[0] * batch_sizes[0] linear_scaling = scale / batch_sizes ax.plot(batch_sizes, linear_scaling, "k--", label="_nolegend_") # Format the axes. ax.set_xlabel("Batch Size") if normalizing_batch_size: ylabel = "Steps / (Steps at B={})".format(normalizing_batch_size) else: ylabel = "Steps" ax.set_ylabel(ylabel) ax.set_xscale("log", basex=2) ax.set_yscale("log", basey=2) ax.grid(True)
def plot_best_measurements(ax, best_measurements, objective_col_name): """Plots the best objective value vs batch size. Args: ax: Instance of pyplot.axes.Axes on which to plot. best_measurements: DataFrame of measurements indexed by batch_size with one row per batch size. Or, a dictionary of such DataFrames. objective_col_name: Column name of the objective metric. """ if isinstance(best_measurements, pd.DataFrame): best_measurements = {"": best_measurements} for label, df in best_measurements.items(): batch_sizes = get_index_values(df, "batch_size") best_objective_values = df[objective_col_name] ax.plot(batch_sizes, best_objective_values, "^-", label=label) # Format the axes. ax.set_xlabel("Batch Size") ax.set_xscale("log", basex=2) ax.grid(True)
def plot_optimal_metaparameter_values(ax, parameter_to_plot, steps_to_result, workload_metadata): """Plots the values of the optimal metaparameters vs batch size. Args: ax: Instance of pyplot.axes.Axes on which to plot. parameter_to_plot: One of ["Learning Rate", "Momentum", "Effective Learning Rate"]. steps_to_result: DataFrame of measurements indexed by (batch_size, step) corresponding to the optimal measurements for each batch size. workload_metadata: A dict containing the metadata for each study. """ # Get the parameters corresponding to the optimal measurements. batch_sizes = get_index_values(steps_to_result, "batch_size") trial_ids = get_index_values(steps_to_result, "trial_id") optimal_parameters = [ workload_metadata[batch_size]["trials"][trial_id]["parameters"] for batch_size, trial_id in zip(batch_sizes, trial_ids) ] # Compute y-values for the parameter to plot. ylabel = parameter_to_plot plot_heuristics = True if parameter_to_plot == "Learning Rate": yvalues = np.array( [parameters["learning_rate"] for parameters in optimal_parameters]) elif parameter_to_plot == "Momentum": yvalues = np.array( [parameters["momentum"] for parameters in optimal_parameters]) plot_heuristics = False elif parameter_to_plot == "Effective Learning Rate": learning_rates = np.array( [parameters["learning_rate"] for parameters in optimal_parameters]) momenta = np.array( [parameters["momentum"] for parameters in optimal_parameters]) yvalues = learning_rates / (1 - momenta) ylabel = "Learning Rate / (1 - Momentum)" else: raise ValueError( "Unrecognized parameter_to_plot: {}".format(parameter_to_plot)) # Plot the optimal parameter values vs batch size. ax.plot(batch_sizes, yvalues, "^-", label="Optimal " + parameter_to_plot) # Plot the "linear" and "square root" scaling heuristics for adjusting the # metaparameter values with increasing batch size. if plot_heuristics: linear_heuristic = [ yvalues[0] * batch_size / batch_sizes[0] for batch_size in batch_sizes ] ax.plot(batch_sizes, linear_heuristic, linestyle="--", c="k", label="Linear Heuristic") sqrt_heuristic = [ yvalues[0] * np.sqrt(batch_size / batch_sizes[0]) for batch_size in batch_sizes ] ax.plot(batch_sizes, sqrt_heuristic, linestyle="-.", c="g", label="Square Root Heuristic") # Format the axes. ax.set_xlabel("Batch Size") ax.set_ylabel(ylabel) ax.set_xscale("log", basex=2) ax.set_yscale("log", basey=2) ax.grid(True)
def plot_learning_rate_momentum_scatter(ax, objective_col_name, objective_goal, study_table, study_metadata, xlim, ylim, maximize=False): """Plots a categorized scatter plot of learning rate and (1 - momentum). Trials are categorized by those that reached the goal objective value, those that did not, and those that diverged during training. Args: ax: Instance of pyplot.axes.Axes on which to plot. objective_col_name: Column name of the objective metric. objective_goal: Threshold value of the objective metric indicating a successful trial. study_table: DataFrame of all measurements in the study indexed by (trial_id, step). study_metadata: A dict of study metadata. xlim: A pair (x_min, x_max) corresponding to the minimum and maximum learning rates to plot. ylim: A pair (y_min, y_max) corresponding to the minimum and maximum momentum values to plot. maximize: Whether the goal is to maximize (as opposed to minimize) the objective metric. """ # Extract the parameters corresponding to each trial in 3 categories: those # that reached the goal objective value, those that did not, and those that # diverged during training. good_params = [] bad_params = [] infeasible_params = [] comparator = operator.gt if maximize else operator.lt for trial_id, trial_metadata in study_metadata["trials"].items(): params = trial_metadata["parameters"] if trial_metadata["status"] == "COMPLETE": measurements = study_table.loc[trial_id][objective_col_name] if np.any(comparator(measurements, objective_goal)): good_params.append(params) else: bad_params.append(params) elif trial_metadata["status"] == "INFEASIBLE": infeasible_params.append(params) else: raise ValueError("Unexpected status: {}".format( trial_metadata["status"])) # Plot all good, bad, and infeasible parameter values. learning_rate, one_minus_momentum = _unpack_params(good_params) ax.scatter(learning_rate, one_minus_momentum, c="b", marker="o", alpha=1.0, s=40, label="Goal Achieved") learning_rate, one_minus_momentum = _unpack_params(bad_params) ax.scatter(learning_rate, one_minus_momentum, c="r", marker="^", alpha=0.7, s=40, label="Goal Not Achieved") learning_rate, one_minus_momentum = _unpack_params(infeasible_params) ax.scatter(learning_rate, one_minus_momentum, alpha=0.7, marker="x", c="k", s=25, label="Infeasible") # Format the axes. ax.set_xlabel("Batch Size") ax.set_xscale("log") ax.set_xlim(xlim) ax.set_ylabel("1 - Momentum") ax.set_yscale("log") ax.set_ylim(ylim) # Plot contour lines. grid_x = np.logspace(np.log10(xlim[0]), np.log10(xlim[1]), num=50) grid_y = np.logspace(np.log10(ylim[0]), np.log10(ylim[1]), num=50) grid_xx, grid_yy = np.meshgrid(grid_x, grid_y) grid_z = np.log10(grid_xx / grid_yy) ax.contour(grid_xx, grid_yy, grid_z, 10, colors="black", alpha=0.5) # Plot the best measurement as a yellow star. str_measurement = compute_steps_to_result(study_table, objective_col_name, objective_goal, maximize, None) if not str_measurement.empty: best_trial_id = get_index_values(str_measurement, "trial_id")[0] best_trial_params = study_metadata["trials"][best_trial_id][ "parameters"] learning_rate, one_minus_momentum = _unpack_params([best_trial_params]) ax.scatter(learning_rate, one_minus_momentum, marker="*", alpha=1.0, s=400, c="yellow")