def load_exps_data_numpy(exp_folder_paths,disable_variant=False,progress_filename="progress.csv"): exps = [] for exp_folder_path in exp_folder_paths: exps += [x[0] for x in os.walk(exp_folder_path)] exps_data = [] for exp in exps: try: exp_path = exp # params_json_path = os.path.join(exp_path, "params.json") params_json_path = os.path.join(exp_path, "params.pkl") variant_json_path = os.path.join(exp_path, "variant.json") progress_csv_path = os.path.join(exp_path, progress_filename) progress = load_progress_numpy(exp_path) if disable_variant: params = core.load_params(params_json_path) else: try: params = core.load_params(variant_json_path) except IOError: params = core.load_params(params_json_path) exps_data.append(ext.AttrDict( progress=progress, params=params, flat_params=core.flatten_dict(params))) except IOError as e: print(e) return exps_data
def load_exps_data(exp_folder_paths, disable_variant=False, ignore_missing_keys=False): exps = [] for exp_folder_path in exp_folder_paths: print(exp_folder_path) exps += [x[0] for x in os.walk(exp_folder_path, followlinks=True)] # print("finished walking exp folders") exps_data = [] for exp in exps: try: exp_path = exp params_json_path = os.path.join(exp_path, "params.json") variant_json_path = os.path.join(exp_path, "variant.json") progress_csv_path = os.path.join(exp_path, "progress.csv") progress = load_progress(progress_csv_path) if disable_variant: params = load_params(params_json_path) else: try: params = load_params(variant_json_path) except IOError: params = load_params(params_json_path) exps_data.append(ext.AttrDict( progress=progress, params=params, flat_params=flatten_dict(params))) except IOError as e: print(e) # a dictionary of all keys and types of values all_keys = dict() for data in exps_data: for key in data.flat_params.keys(): if key not in all_keys: all_keys[key] = type(data.flat_params[key]) # if any data does not have some key, specify the value of it if not ignore_missing_keys: default_values = dict() for data in exps_data: for key in sorted(all_keys.keys()): if key not in data.flat_params: if key not in default_values: default = input("Please specify the default value of \033[93m %s \033[0m: " % (key)) try: if all_keys[key].__name__ == 'NoneType': default = None elif all_keys[key].__name__ == 'bool': try: default = eval(default) except: default = False else: default = all_keys[key](default) except ValueError: print("Warning: cannot cast %s to %s" % (default, all_keys[key])) default_values[key] = default data.flat_params[key] = default_values[key] return exps_data
def load_exps_data(exp_folder_path): exps = [x[0] for x in os.walk(exp_folder_path)] exps_data = [] for exp in exps: try: exp_path = exp params_json_path = os.path.join(exp_path, "params.json") progress_csv_path = os.path.join(exp_path, "progress.csv") progress = load_progress(progress_csv_path) params = load_params(params_json_path) exps_data.append(ext.AttrDict( progress=progress, params=params, flat_params=flatten_dict(params))) except IOError as e: print(e) return exps_data
def get_plot_instruction(plot_key, split_key=None, group_key=None, filters=None, use_median=False, only_show_best=False, only_show_best_final=False, gen_eps=False, only_show_best_sofar=False, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False, smooth_curve=False, custom_filter=None, legend_post_processor=None, normalize_error=False, custom_series_splitter=None): print(plot_key, split_key, group_key, filters) if filter_nan: nonnan_exps_data = list(filter(check_nan, exps_data)) selector = core.Selector(nonnan_exps_data) else: selector = core.Selector(exps_data) if legend_post_processor is None: legend_post_processor = lambda x: x if filters is None: filters = dict() for k, v in filters.items(): selector = selector.where(k, str(v)) if custom_filter is not None: selector = selector.custom_filter(custom_filter) # print selector._filters if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = list(map(str, vs)) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 for split_selector, split_legend in zip(split_selectors, split_legends): if custom_series_splitter is not None: exps = split_selector.extract() splitted_dict = dict() for exp in exps: key = custom_series_splitter(exp) if key not in splitted_dict: splitted_dict[key] = list() splitted_dict[key].append(exp) splitted = list(splitted_dict.items()) group_selectors = [core.Selector(list(x[1])) for x in splitted] group_legends = [x[0] for x in splitted] else: if group_key and group_key is not "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [str(x) for x in vs] else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] # group_selectors = [split_selector] # group_legends = [split_legend] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if len(filtered_data) > 0: if only_show_best or only_show_best_final or only_show_best_sofar: # Group by seed and sort. # ----------------------- filtered_params = core.extract_distinct_params( filtered_data, l=0) filtered_params2 = [p[1] for p in filtered_params] filtered_params_k = [p[0] for p in filtered_params] product_space = list(itertools.product(*filtered_params2)) data_best_regret = None best_regret = -np.inf kv_string_best_regret = None for idx, params in enumerate(product_space): selector = core.Selector(exps_data) for k, v in zip(filtered_params_k, params): selector = selector.where(k, str(v)) data = selector.extract() if len(data) > 0: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data ] # progresses = [progress[:500] for progress in progresses ] sizes = list(map(len, progresses)) max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] if only_show_best_final: progresses = np.asarray(progresses)[:, -1] if only_show_best_sofar: progresses = np.max(np.asarray(progresses), axis=1) if use_median: medians = np.nanmedian(progresses, axis=0) regret = np.mean(medians) else: means = np.nanmean(progresses, axis=0) regret = np.mean(means) distinct_params_k = [p[0] for p in distinct_params] distinct_params_v = [ v for k, v in zip(filtered_params_k, params) if k in distinct_params_k ] distinct_params_kv = [(k, v) for k, v in zip( distinct_params_k, distinct_params_v)] distinct_params_kv_string = str( distinct_params_kv).replace('), ', ')\t') print('{}\t{}\t{}'.format( regret, len(progresses), distinct_params_kv_string)) if regret > best_regret: best_regret = regret best_progress = progresses data_best_regret = data kv_string_best_regret = distinct_params_kv_string print(group_selector._filters) print('best regret: {}'.format(best_regret)) # ----------------------- if best_regret != -np.inf: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data_best_regret ] # progresses = [progress[:500] for progress in progresses ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] legend = '{} (mu: {:.3f}, std: {:.5f})'.format( group_legend, best_regret, np.std(best_progress)) window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if normalize_error: # and len(progresses) > 0: stds /= np.sqrt( np.sum((1. - np.isnan(progresses)), axis=0)) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(legend))) if len(to_plot) > 0 and len(data) > 0: to_plot[-1]["footnote"] = "%s; e.g. %s" % ( kv_string_best_regret, data[0].params.get( "exp_name", "NA")) else: to_plot[-1]["footnote"] = "" else: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(group_legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(group_legend))) if len(to_plot) > 0 and not gen_eps: fig_title = "%s: %s" % (split_key, split_legend) # plots.append("<h3>%s</h3>" % fig_title) plots.append( make_plot(to_plot, use_median=use_median, title=fig_title, plot_width=plot_width, plot_height=plot_height)) if gen_eps: make_plot_eps(to_plot, use_median=use_median, counter=counter) counter += 1 return "\n".join(plots)
def plot( y_key, x_key=None, x_scale=1., split_key=None, group_key=None, filters=None, custom_filter=None, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False, smooth_curve=False, legend_post_processor=None, normalize_error=False, squeeze_nan=False, xlim=None, ylim=None, show_exp_count=False, sub_plots=False, sort_int_legend=False, same_legend=False, legend_title=None, legend_title_x=1.05, legend_title_y=1.01, plot_stds=True, font_size=12, legend_font_size=None, line_dash_list=None, n_rows=1, dtick=None, ): if legend_font_size is None: legend_font_size = font_size print(y_key, split_key, group_key, filters) if filter_nan: nonnan_exps_data = list(filter(check_nan, exps_data)) selector = Selector(nonnan_exps_data) else: selector = Selector(exps_data) if legend_post_processor is None: legend_post_processor = lambda x: x if filters is None: filters = dict() for k, v in filters.items(): selector = selector.where(k, str(v)) if custom_filter is not None: selector = selector.custom_filter(custom_filter) # print selector._filters if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = list(map(str, vs)) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 if sub_plots: n_per_row = -(-len(split_legends) // n_rows) fig = tools.make_subplots(rows=n_rows, cols=-(-len(split_legends) // n_rows), subplot_titles=split_legends) n_groups = None for split_selector, split_legend in zip(split_selectors, split_legends): print("split_legend: ", split_legend) if group_key and group_key is not "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = [str(x) for x in vs] else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] if sort_int_legend: _, old_idxs = \ zip(*sorted((int(g), i) for i, g in enumerate(group_legends))) group_selectors = [group_selectors[i] for i in old_idxs] group_legends = [group_legends[i] for i in old_idxs] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): print("group_legend: ", group_legend) filtered_data = group_selector.extract() if show_exp_count: group_legend += " (%d)" % (len(filtered_data)) print("len(filtered_data): ", len(filtered_data)) if len(filtered_data) > 0: progresses = [ exp.progress.get(y_key, np.array([np.nan])) for exp in filtered_data ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate([ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] window_size = np.maximum(int(np.round(max_size / float(1000))), 1) means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if not plot_stds: stds[:] = 0 if x_key is not None: xs = [ exp.progress.get(x_key, np.array([np.nan])) for exp in filtered_data ] # set_trace() # IPython import embed; embed() if not all([len(xi) == len(xs[0]) for xi in xs]): print( "WARNING: different length xs within group, using longest" ) lengths = [len(xi) for xi in xs] max_i = np.argmax(lengths) xs[0] = xs[max_i] elif not all([all(xi == xs[0]) for xi in xs]): print( "WARNING: different xs within group, using one of them for all" ) xs = xs[0] else: xs = list(range(len(means))) xs = [x * x_scale for x in xs] if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) print(len(xs), len(means)) assert len(xs) == len(means) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(xs=xs, means=means, stds=stds, legend=legend_post_processor(group_legend))) elif same_legend: to_plot.append( ext.AttrDict(xs=np.array([np.nan]), means=np.array([np.nan]), stds=np.array([np.nan]), legend=legend_post_processor(group_legend))) if len(to_plot) > 0: if sub_plots: plot_data = make_plot_data(to_plot, showlegend=(counter == 1)) this_row = ((counter - 1) // n_per_row) + 1 this_col = ((counter - 1) % n_per_row) + 1 print("counter, row, col: ", counter, this_row, this_col) for data in plot_data: fig.append_trace(data, this_row, this_col) else: fig_title = "%s: %s" % (split_key, split_legend) fig = make_plot( to_plot, title=fig_title, plot_width=plot_width, plot_height=plot_height, xlim=xlim, ylim=ylim, font_size=font_size, legend_font_size=legend_font_size, line_dash_list=line_dash_list, ) if legend_title is not None: legend_title_annot = go.Annotation( x=legend_title_x, y=legend_title_y, align="right", valign="top", text=legend_title, showarrow=False, xref="paper", yref="paper", xanchor="middle", yanchor="top", ) fig['layout']["annotations"] += [legend_title_annot] po.iplot(fig) counter += 1 if sub_plots: fig['layout'].update(height=plot_height, width=plot_width) # set_trace() if xlim is not None: for i in range(1, counter): fig['layout']['xaxis' + str(i)].update(range=[0, xlim]) if dtick is not None: for i in range(1, counter): fig['layout']['xaxis' + str(i)].update(dtick=dtick) # fig['layout']['xaxis'].update(range=[0, xlim]) # set_trace() fig['layout']['font'].update(size=font_size) fig['layout']['legend']['font']['size'] = legend_font_size for subtitle in fig['layout']['annotations']: subtitle['font']['size'] = font_size # fig['layout']['titlefont'].update(size=font_size) if legend_title is not None: legend_title = go.Annotation( x=legend_title_x, y=legend_title_y, align="right", valign="top", text=legend_title, showarrow=False, xref="paper", yref="paper", xanchor="middle", yanchor="top", ) fig['layout']["annotations"] += [legend_title] po.iplot(fig)
def get_plot_instruction(plot_key, split_key=None, group_key=None, filters=None, use_median=False, only_show_best=False, gen_eps=False, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False): print(plot_key, split_key, group_key, filters) if filter_nan: nonnan_exps_data = filter(check_nan, exps_data) selector = core.Selector(nonnan_exps_data) else: selector = core.Selector(exps_data) if filters is None: filters = dict() for k, v in filters.iteritems(): selector = selector.where(k, str(v)) # print selector._filters if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = map(str, vs) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 for split_selector, split_legend in zip(split_selectors, split_legends): if group_key and group_key is not "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = map(lambda x: str(x), vs) else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] # group_selectors = [split_selector] # group_legends = [split_legend] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if len(filtered_data) > 0: if only_show_best: # Group by seed and sort. # ----------------------- filtered_params = core.extract_distinct_params( filtered_data, l=0) filtered_params2 = [p[1] for p in filtered_params] filtered_params_k = [p[0] for p in filtered_params] product_space = list(itertools.product(*filtered_params2)) data_best_regret = None best_regret = -np.inf for idx, params in enumerate(product_space): selector = core.Selector(exps_data) for k, v in zip(filtered_params_k, params): selector = selector.where(k, str(v)) data = selector.extract() if len(data) > 0: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data ] sizes = map(len, progresses) max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] if use_median: medians = np.nanmedian(progresses, axis=0) regret = np.median(medians) else: means = np.nanmean(progresses, axis=0) regret = np.mean(means) if regret > best_regret: best_regret = regret data_best_regret = data distinct_params_k = [p[0] for p in distinct_params] distinct_params_v = [ v for k, v in zip(filtered_params_k, params) if k in distinct_params_k ] distinct_params_kv = [(k, v) for k, v in zip( distinct_params_k, distinct_params_v)] distinct_params_kv_string = str( distinct_params_kv).replace('), ', ')\t') print('{}\t{}\t{}'.format( regret, len(progresses), distinct_params_kv_string)) print(group_selector._filters) print('best regret: {}'.format(best_regret)) # ----------------------- if best_regret != -np.inf: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data_best_regret ] sizes = map(len, progresses) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] legend = '{} ({:.1f})'.format(group_legend, best_regret) window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend)) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(means=means, stds=stds, legend=legend)) else: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = map(len, progresses) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=group_legend)) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(means=means, stds=stds, legend=group_legend)) if len(to_plot) > 0 and not gen_eps: plots.append("<div>%s: %s</div>" % (split_key, split_legend)) plots.append( make_plot(to_plot, use_median=use_median, plot_width=plot_width, plot_height=plot_height)) if gen_eps: make_plot_eps(to_plot, use_median=use_median, counter=counter) counter += 1 return "\n".join(plots)