def get_plot_instruction(plot_key, split_key=None, group_key=None, filters=None, use_median=False, only_show_best=False, only_show_best_final=False, gen_eps=False, only_show_best_sofar=False, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False, smooth_curve=False, custom_filter=None, legend_post_processor=None, normalize_error=False, custom_series_splitter=None): print(plot_key, split_key, group_key, filters) if filter_nan: nonnan_exps_data = list(filter(check_nan, exps_data)) selector = core.Selector(nonnan_exps_data) else: selector = core.Selector(exps_data) if legend_post_processor is None: def default_legend_post_processor(x): return x legend_post_processor = default_legend_post_processor if filters is None: filters = dict() for k, v in filters.items(): selector = selector.where(k, str(v)) if custom_filter is not None: selector = selector.custom_filter(custom_filter) # print selector._filters if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = list(map(str, vs)) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 for split_selector, split_legend in zip(split_selectors, split_legends): if custom_series_splitter is not None: exps = split_selector.extract() splitted_dict = dict() for exp in exps: key = custom_series_splitter(exp) if key not in splitted_dict: splitted_dict[key] = list() splitted_dict[key].append(exp) splitted = list(splitted_dict.items()) group_selectors = [core.Selector(list(x[1])) for x in splitted] group_legends = [x[0] for x in splitted] else: if group_key and group_key != "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [str(x) for x in vs] else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] # group_selectors = [split_selector] # group_legends = [split_legend] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if filtered_data: if (only_show_best or only_show_best_final or only_show_best_sofar): # Group by seed and sort. # ----------------------- filtered_params = core.extract_distinct_params( filtered_data, l=0) # noqa: E741 filtered_params2 = [p[1] for p in filtered_params] filtered_params_k = [p[0] for p in filtered_params] product_space = list(itertools.product(*filtered_params2)) data_best_regret = None best_regret = -np.inf kv_string_best_regret = None for idx, params in enumerate(product_space): selector = core.Selector(exps_data) for k, v in zip(filtered_params_k, params): selector = selector.where(k, str(v)) data = selector.extract() if data: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data ] # progresses = [ # progress[:500] for progress in progresses # ] sizes = list(map(len, progresses)) max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] if only_show_best_final: progresses = np.asarray(progresses)[:, -1] if only_show_best_sofar: progresses = np.max(np.asarray(progresses), axis=1) if use_median: medians = np.nanmedian(progresses, axis=0) regret = np.mean(medians) else: means = np.nanmean(progresses, axis=0) regret = np.mean(means) distinct_params_k = [p[0] for p in distinct_params] distinct_params_v = [ v for k, v in zip(filtered_params_k, params) if k in distinct_params_k ] distinct_params_kv = [(k, v) for k, v in zip( distinct_params_k, distinct_params_v)] distinct_params_kv_string = str( distinct_params_kv).replace('), ', ')\t') print('{}\t{}\t{}'.format( regret, len(progresses), distinct_params_kv_string)) if regret > best_regret: best_regret = regret best_progress = progresses data_best_regret = data kv_string_best_regret = \ distinct_params_kv_string print(group_selector._filters) print('best regret: {}'.format(best_regret)) # ----------------------- if best_regret != -np.inf: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data_best_regret ] # progresses = \ # [progress[:500] for progress in progresses] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] legend = '{} (mu: {:.3f}, std: {:.5f})'.format( group_legend, best_regret, np.std(best_progress)) window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if normalize_error: # and len(progresses) > 0: stds /= np.sqrt( np.sum((1. - np.isnan(progresses)), axis=0)) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(legend))) if to_plot and data: to_plot[-1]["footnote"] = "%s; e.g. %s" % ( kv_string_best_regret, data[0].params.get( "exp_name", "NA")) else: to_plot[-1]["footnote"] = "" else: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(group_legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(group_legend))) if to_plot and not gen_eps: fig_title = "%s: %s" % (split_key, split_legend) # plots.append("<h3>%s</h3>" % fig_title) plots.append( make_plot(to_plot, use_median=use_median, title=fig_title, plot_width=plot_width, plot_height=plot_height)) if gen_eps: make_plot_eps(to_plot, use_median=use_median, counter=counter) counter += 1 return "\n".join(plots)
def get_plot_instruction( plot_keys, x_keys=None, split_keys=None, group_keys=None, best_filter_key=None, filters=None, exclusions=None, use_median=False, only_show_best=False, best_based_on_final=False, gen_eps=False, only_show_best_sofar=False, best_is_lowest=False, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False, smooth_curve=False, custom_filter=None, legend_post_processor=None, normalize_error=False, make_bar_chart=False, value_i=-1, # TODO: add option to set value_i custom_series_splitter=None, ): if x_keys is None: x_keys = [] if x_keys: assert len(x_keys) == 1 if x_keys[0] is None: x_keys = [] plot_keys = x_keys + plot_keys """ A custom filter might look like "lambda exp: exp.flat_params['algo_params_base_kwargs.batch_size'] == 64" """ if filter_nan: nonnan_exps_data = list(filter(check_nan, exps_data)) selector = core.Selector(nonnan_exps_data) else: selector = core.Selector(exps_data) if legend_post_processor is None: legend_post_processor = lambda x: x if filters is None: filters = dict() if exclusions is None: exclusions = [] if split_keys is None: split_keys = [] if group_keys is None: group_keys = [] if plot_height is None: plot_height = 300 * len(plot_keys) for k, v in filters.items(): selector = selector.where(k, str(v)) for k, v in exclusions: selector = selector.where_not(k, str(v)) if custom_filter is not None: selector = selector.custom_filter(custom_filter) if len(split_keys) > 0: split_selectors, split_titles = split_by_keys( selector, split_keys, distinct_params ) else: split_selectors = [selector] split_titles = ["Plot"] plots = [] counter = 1 print("Plot_keys:", plot_keys) print("X keys:", x_keys) print("split_keys:", split_keys) print("group_keys:", group_keys) print("filters:", filters) print("exclusions:", exclusions) for split_selector, split_title in zip(split_selectors, split_titles): if custom_series_splitter is not None: exps = split_selector.extract() splitted_dict = dict() for exp in exps: key = custom_series_splitter(exp) if key not in splitted_dict: splitted_dict[key] = list() splitted_dict[key].append(exp) splitted = list(splitted_dict.items()) group_selectors = [core.Selector(list(x[1])) for x in splitted] group_legends = [x[0] for x in splitted] else: if len(group_keys) > 0: group_selectors, group_legends = split_by_keys( split_selector, group_keys, distinct_params ) else: group_selectors = [split_selector] group_legends = [split_title] list_of_list_of_plot_dicts = [] for plot_ind, plot_key in enumerate(plot_keys): to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if len(filtered_data) == 0: continue if (best_filter_key and best_filter_key not in group_keys and best_filter_key not in split_keys): selectors = split_by_key( group_selector, best_filter_key, distinct_params ) scores = [ get_selector_score(plot_key, selector, use_median, best_based_on_final) for selector in selectors ] if np.isfinite(scores).any(): if best_is_lowest: best_idx = np.nanargmin(scores) else: best_idx = np.nanargmax(scores) best_selector = selectors[best_idx] filtered_data = best_selector.extract() print("For split '{0}', group '{1}':".format( split_title, group_legend, )) print(" best '{0}': {1}".format( best_filter_key, dict(best_selector._filters)[best_filter_key] )) if only_show_best or only_show_best_sofar: # Group by seed and sort. # ----------------------- filtered_params = core.extract_distinct_params( filtered_data, l=0) filtered_params2 = [p[1] for p in filtered_params] filtered_params_k = [p[0] for p in filtered_params] product_space = list(itertools.product( *filtered_params2 )) data_best_regret = None best_regret = np.inf if best_is_lowest else -np.inf kv_string_best_regret = None for idx, params in enumerate(product_space): selector = core.Selector(exps_data) for k, v in zip(filtered_params_k, params): selector = selector.where(k, str(v)) data = selector.extract() if len(data) > 0: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data ] sizes = list(map(len, progresses)) max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses] if best_based_on_final: progresses = np.asarray(progresses)[:, -1] if only_show_best_sofar: if best_is_lowest: progresses = np.min(np.asarray(progresses), axis=1) else: progresses = np.max(np.asarray(progresses), axis=1) if use_median: medians = np.nanmedian(progresses, axis=0) regret = np.mean(medians) else: means = np.nanmean(progresses, axis=0) regret = np.mean(means) distinct_params_k = [p[0] for p in distinct_params] distinct_params_v = [ v for k, v in zip(filtered_params_k, params) if k in distinct_params_k] distinct_params_kv = [ (k, v) for k, v in zip(distinct_params_k, distinct_params_v)] distinct_params_kv_string = str( distinct_params_kv).replace('), ', ')\t') print( '{}\t{}\t{}'.format(regret, len(progresses), distinct_params_kv_string)) if best_is_lowest: change_regret = regret < best_regret else: change_regret = regret > best_regret if change_regret: best_regret = regret best_progress = progresses data_best_regret = data kv_string_best_regret = distinct_params_kv_string print(group_selector._filters) print('best regret: {}'.format(best_regret)) # ----------------------- if np.isfinite(best_regret): progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data_best_regret] # progresses = [progress[:500] for progress in progresses ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses] legend = '{} (mu: {:.3f}, std: {:.5f})'.format( group_legend, best_regret, np.std(best_progress)) window_size = np.maximum( int(np.round(max_size / float(1000))), 1) statistics = get_statistics( progresses, use_median, normalize_error, ) statistics = process_statistics( statistics, smooth_curve, clip_plot_value, window_size, ) to_plot.append( AttrDict( legend=legend_post_processor(legend), plot_key=plot_key, **statistics ) ) if len(to_plot) > 0 and len(data) > 0: to_plot[-1]["footnote"] = "%s; e.g. %s" % ( kv_string_best_regret, data[0].params.get("exp_name", "NA")) else: to_plot[-1]["footnote"] = "" else: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses] window_size = np.maximum( int(np.round(max_size / float(100))), 1, ) statistics = get_statistics( progresses, use_median, normalize_error, ) statistics = process_statistics( statistics, smooth_curve, clip_plot_value, window_size, ) to_plot.append( AttrDict( legend=legend_post_processor(group_legend), plot_key=plot_key, x_key=plot_key in x_keys and plot_ind == 0, **statistics ) ) if len(to_plot) > 0: list_of_list_of_plot_dicts.append(to_plot) if len(list_of_list_of_plot_dicts) > 0 and not gen_eps: fig_title = split_title if make_bar_chart: plots.append(create_bar_chart( list_of_list_of_plot_dicts, use_median=use_median, title=fig_title, plot_width=plot_width, plot_height=plot_height, value_i=value_i, )) else: plots.append(make_plot( list_of_list_of_plot_dicts, use_median=use_median, title=fig_title, plot_width=plot_width, plot_height=plot_height )) if gen_eps: make_plot_eps(to_plot, use_median=use_median, counter=counter) counter += 1 return "\n".join(plots)
def get_plot_instruction( x_plot_key, plot_key, display_mode, split_key=None, group_key=None, filters=None, ): # print(x_plot_key, plot_key, split_key, group_key, filters) # if x_plot_key != "(default)": # group_key = None selector = core.Selector(exps_data) if filters is None: filters = dict() for k, v in filters.items(): selector = selector.where(k, str(v)) if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = list(map(str, vs)) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 for split_selector, split_legend in zip(split_selectors, split_legends): if group_key and group_key is not "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = [str(x) for x in vs] else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if len(filtered_data) > 0: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate([ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] if x_plot_key == "(default)": xs = np.arange(max_size) else: # first decide what the xs should be # ideally, it should be the union of all_xs = np.unique( np.sort( np.concatenate([ d.progress.get(x_plot_key, []) for d in filtered_data ]))) interp_progresses = [] for d in filtered_data: if x_plot_key in d.progress: assert plot_key in d.progress interp_progresses.append( np.interp(all_xs, d.progress[x_plot_key], d.progress[plot_key], right=np.nan)) else: continue progresses = interp_progresses xs = all_xs if display_mode == "mean_std": means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) to_plot.append( AttrDict( means=means, stds=stds, legend=group_legend, xs=xs, display_mode=display_mode, )) elif display_mode == "mean_se": means = np.nanmean(progresses, axis=0) ses = np.nanstd(progresses, axis=0) / \ np.sqrt(np.sum(1 - np.isnan(progresses), axis=0)) to_plot.append( AttrDict( means=means, ses=ses, legend=group_legend, xs=xs, display_mode=display_mode, )) elif display_mode == "individual": to_plot.append( AttrDict( xs=xs, ys=progresses, legend=group_legend, display_mode=display_mode, )) else: raise NotImplementedError if len(to_plot) > 0: fig_title = "%s: %s" % (split_key, split_legend) plots.append(make_plot( to_plot, title=fig_title, )) counter += 1 return "\n".join(plots)