def load_exps_data(exp_folder_paths, disable_variant=False): exps = [] for exp_folder_path in exp_folder_paths: exps += [x[0] for x in os.walk(exp_folder_path)] exps_data = [] for exp in exps: try: exp_path = exp params_json_path = os.path.join(exp_path, "params.json") variant_json_path = os.path.join(exp_path, "variant.json") progress_csv_path = os.path.join(exp_path, "progress.csv") progress = load_progress(progress_csv_path) if disable_variant: params = load_params(params_json_path) else: try: params = load_params(variant_json_path) except IOError: params = load_params(params_json_path) exps_data.append( ext.AttrDict(progress=progress, params=params, flat_params=flatten_dict(params))) except IOError as e: print(e) return exps_data
def get_plot_instruction(plot_key, split_key=None, group_key=None, filters=None, use_median=False, only_show_best=False, only_show_best_final=False, gen_eps=False, only_show_best_sofar=False, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False, smooth_curve=False, custom_filter=None, legend_post_processor=None, normalize_error=False, custom_series_splitter=None): print(plot_key, split_key, group_key, filters) if filter_nan: nonnan_exps_data = list(filter(check_nan, exps_data)) selector = core.Selector(nonnan_exps_data) else: selector = core.Selector(exps_data) if legend_post_processor is None: def default_legend_post_processor(x): return x legend_post_processor = default_legend_post_processor if filters is None: filters = dict() for k, v in filters.items(): selector = selector.where(k, str(v)) if custom_filter is not None: selector = selector.custom_filter(custom_filter) # print selector._filters if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = list(map(str, vs)) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 for split_selector, split_legend in zip(split_selectors, split_legends): if custom_series_splitter is not None: exps = split_selector.extract() splitted_dict = dict() for exp in exps: key = custom_series_splitter(exp) if key not in splitted_dict: splitted_dict[key] = list() splitted_dict[key].append(exp) splitted = list(splitted_dict.items()) group_selectors = [core.Selector(list(x[1])) for x in splitted] group_legends = [x[0] for x in splitted] else: if group_key and group_key != "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [str(x) for x in vs] else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] # group_selectors = [split_selector] # group_legends = [split_legend] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if filtered_data: if (only_show_best or only_show_best_final or only_show_best_sofar): # Group by seed and sort. # ----------------------- filtered_params = core.extract_distinct_params( filtered_data, l=0) # noqa: E741 filtered_params2 = [p[1] for p in filtered_params] filtered_params_k = [p[0] for p in filtered_params] product_space = list(itertools.product(*filtered_params2)) data_best_regret = None best_regret = -np.inf kv_string_best_regret = None for idx, params in enumerate(product_space): selector = core.Selector(exps_data) for k, v in zip(filtered_params_k, params): selector = selector.where(k, str(v)) data = selector.extract() if data: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data ] # progresses = [ # progress[:500] for progress in progresses # ] sizes = list(map(len, progresses)) max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] if only_show_best_final: progresses = np.asarray(progresses)[:, -1] if only_show_best_sofar: progresses = np.max(np.asarray(progresses), axis=1) if use_median: medians = np.nanmedian(progresses, axis=0) regret = np.mean(medians) else: means = np.nanmean(progresses, axis=0) regret = np.mean(means) distinct_params_k = [p[0] for p in distinct_params] distinct_params_v = [ v for k, v in zip(filtered_params_k, params) if k in distinct_params_k ] distinct_params_kv = [(k, v) for k, v in zip( distinct_params_k, distinct_params_v)] distinct_params_kv_string = str( distinct_params_kv).replace('), ', ')\t') print('{}\t{}\t{}'.format( regret, len(progresses), distinct_params_kv_string)) if regret > best_regret: best_regret = regret best_progress = progresses data_best_regret = data kv_string_best_regret = \ distinct_params_kv_string print(group_selector._filters) print('best regret: {}'.format(best_regret)) # ----------------------- if best_regret != -np.inf: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data_best_regret ] # progresses = \ # [progress[:500] for progress in progresses] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] legend = '{} (mu: {:.3f}, std: {:.5f})'.format( group_legend, best_regret, np.std(best_progress)) window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if normalize_error: # and len(progresses) > 0: stds /= np.sqrt( np.sum((1. - np.isnan(progresses)), axis=0)) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(legend))) if to_plot and data: to_plot[-1]["footnote"] = "%s; e.g. %s" % ( kv_string_best_regret, data[0].params.get( "exp_name", "NA")) else: to_plot[-1]["footnote"] = "" else: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(group_legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(group_legend))) if to_plot and not gen_eps: fig_title = "%s: %s" % (split_key, split_legend) # plots.append("<h3>%s</h3>" % fig_title) plots.append( make_plot(to_plot, use_median=use_median, title=fig_title, plot_width=plot_width, plot_height=plot_height)) if gen_eps: make_plot_eps(to_plot, use_median=use_median, counter=counter) counter += 1 return "\n".join(plots)