def _init_flag_views(self): ''' The result should be something like this: self.env.views = { 'TAB_NAME': { 0: view_object_0, 1: view_object_1, } } NOTE: self.env.all_flags and self.env.tabs_flags_plots should be created beforehand ''' lg.info('-- INIT FLAG VIEWS') # lg.warning('>> SELF.ENV.ALL_FLAGS: {}'.format(self.env.all_flags)) # lg.warning('>> self.env.tabs_flags_plot: {}'.format(self.env.tabs_flags_plots)) # lg.warning('>> self.env.f_handler.tab_list: {}'.format(self.env.f_handler.tab_list)) # lg.info('>> self.env.source.data: {}'.format(self.env.source.data)) # TODO: tabs with the same flag should share the views for tab in self.env.f_handler.tab_list: flag = tab + FLAG_END flags = {} for i, val in enumerate(self.env.source.data[flag]): flags.setdefault(int(val), []).append(i) flag_views = {} for key in list(self.env.all_flags.keys()): if key in flags: view = CDSView(source=self.env.source, filters=[IndexFilter(flags[key])]) else: # there is no values view = CDSView(source=self.env.source, filters=[IndexFilter([])]) flag_views[key] = view self.env.flag_views[tab] = flag_views
def _create_views(self, source, used_configs): """Create views in order of plotting, so more interesting views are plotted on top. Order of interest: default > final-incumbent > incumbent > candidate local > random num_runs (ascending, more evaluated -> more interesting) Individual views are necessary, since bokeh can only plot one marker-type (circle, triangle, ...) per 'scatter'-call Parameters ----------: source: ColumnDataSource containing relevant information for plotting used_configs: List[Configuration] configs that are contained in this source. necessary to plot glyphs for the independent runs so they can be toggled. not all configs are in every source because of efficiency: no need to have 0-runs configs Returns ------- views: List[CDSView] views in order of plotting views_by_run: Dict[ConfiguratorRun -> List[int]] maps each run to a list of indices of the related glyphs in the returned 'views'-list markers: List[string] markers (to the view with the same index) """ def _get_marker(t, o): """ returns marker according to type t and origin o """ if t == "Default": shape = 'triangle' elif t == 'Final Incumbent': shape = 'inverted_triangle' else: shape = 'square' if t == "Incumbent" else 'circle' shape += '_x' if o.startswith("Acquisition Function") else '' return shape views, markers = [], [] views_by_run = {run : [] for run in self.configs_in_run} idx = 0 for t in ['Candidate', 'Incumbent', 'Final Incumbent', 'Default']: for o in ['Unknown', 'Random', 'Acquisition Function']: for z in sorted(list(set(source.data['zorder'])), key=lambda x: int(x)): for run, configs in self.configs_in_run.items(): booleans = [True if c in configs else False for c in used_configs] view = CDSView(source=source, filters=[ GroupFilter(column_name='type', group=t), GroupFilter(column_name='origin', group=o), GroupFilter(column_name='zorder', group=z), BooleanFilter(booleans)]) views.append(view) # all views views_by_run[run].append(idx) # views sorted by runs idx += 1 markers.append(_get_marker(t, o)) self.logger.debug("%d different glyph renderers, %d different zorder-values", len(views), len(set(source.data['zorder']))) return (views, views_by_run, markers)
def _plot_create_views(self, source): """Create views in order of plotting, so more interesting views are plotted on top. Order of interest: default > final-incumbent > incumbent > candidate local > random num_runs (ascending, more evaluated -> more interesting) Individual views are necessary, since bokeh can only plot one marker-typei (circle, triangle, ...) per 'scatter'-call Parameters ---------- source: ColumnDataSource containing relevant information for plotting Returns ------- views: List[CDSView] views in order of plotting markers: List[string] markers (to the view with the same index) """ def _get_marker(t, o): """ returns marker according to type t and origin o """ if t == "Default": shape = 'triangle' elif t == 'Final Incumbent': shape = 'inverted_triangle' else: shape = 'square' if t == "Incumbent" else 'circle' shape += '_x' if o.startswith("Acquisition Function") else '' return shape views, markers = [], [] for t in ['Candidate', 'Incumbent', 'Final Incumbent', 'Default']: for o in ['Unknown', 'Random', 'Acquisition Function']: for z in sorted(list(set(source.data['zorder'])), key=lambda x: int(x)): views.append( CDSView(source=source, filters=[ GroupFilter(column_name='type', group=t), GroupFilter(column_name='origin', group=o), GroupFilter(column_name='zorder', group=z) ])) markers.append(_get_marker(t, o)) self.logger.debug( "%d different glyph renderers, %d different zorder-values", len(views), len(set(source.data['zorder']))) return (views, markers)
def generate_view(source, axis, positive): filter = CustomJSFilter( code=f""" var indices = []; for (var i = 0; i < source.get_length(); i++){{ if (source.data['{axis}'][i] {'>=' if positive else '<='} 0){{ indices.push(true); }} else {{ indices.push(false); }} }} return indices; """ ) return CDSView(source=source, filters=[filter])
def plot(self): """ Plot performance over time, using all trajectory entries. max_time denotes max(wallclock_limit, highest recorded time). """ rh, runs, output_fn, validator = self.rh, self.runs, self.output_fn, self.validator # Add lines to be plotted to lines (key-values must be zippable) lines = [] # Get plotting data and create CDS if self.bohb_results: lines.append(self._get_bohb_line(validator, runs, rh)) for b in self.bohb_results[0].HB_config['budgets']: lines.append(self._get_bohb_line(validator, runs, rh, b)) else: lines.append(self._get_avg(validator, runs, rh)) lines.extend(self._get_all_runs(validator, runs, rh)) data = {'name': [], 'time': [], 'mean': [], 'upper': [], 'lower': []} hp_names = self.scenario.cs.get_hyperparameter_names() for p in hp_names: data[p] = [] for line in lines: for t, m, u, l, c in zip(line.time, line.mean, line.upper, line.lower, line.config): if not (np.isfinite(m) and np.isfinite(u) and np.isfinite(l)): self.logger.debug("Why is there a NaN? (%s)", str(line)) raise ValueError( "There is a NaN value in your data, this should be filtered out. " "Please report this to github.com/automl/CAVE/issues and provide the " "debug/debug.log and the output of `pip freeze`, if you can." ) data['name'].append(line.name) data['time'].append(t) data['mean'].append(m) data['upper'].append(u) data['lower'].append(l) for p in hp_names: data[p].append(c[p] if (c and p in c) else 'inactive') source = ColumnDataSource(data=data) # Create plot x_range = Range1d(min(source.data['time']), max(source.data['time'])) y_label = 'estimated {}'.format(self.scenario.run_obj if self.scenario. run_obj != 'quality' else 'cost') p = figure(plot_width=700, plot_height=500, tools=['save', 'pan', 'box_zoom', 'wheel_zoom', 'reset'], x_range=x_range, x_axis_type='log', y_axis_type='log' if self.scenario.run_obj == 'runtime' else 'linear', x_axis_label='time (sec)', y_axis_label=y_label, title="Cost over time") colors = itertools.cycle(Dark2_5) renderers = [] legend_it = [] for line, color in zip(lines, colors): # CDSview w GroupFilter name = line.name view = CDSView( source=source, filters=[GroupFilter(column_name='name', group=str(name))]) renderers.append([ p.line('time', 'mean', source=source, view=view, line_color=color, visible=True if line.name in ['average', 'all budgets'] else False) ]) # Add to legend legend_it.append((name, renderers[-1])) if name in ['average', 'all budgets'] or 'budget' in name: # Fill area (uncertainty) # Defined as sequence of coordinates, so for step-effect double and arange accordingly ([(t0, v0), (t1, v0), (t1, v1), ... (tn, vn-1)]) band_x = np.append(line.time, line.time[::-1]) band_y = np.append(line.lower, line.upper[::-1]) renderers[-1].extend([ p.patch(band_x, band_y, color='#7570B3', fill_alpha=0.2, visible=True if line.name in ['average', 'all budgets'] else False) ]) # Tooltips tooltips = [("estimated performance", "@mean"), ("at-time", "@time")] p.add_tools( HoverTool( renderers=[i for s in renderers for i in s], tooltips=tooltips, )) # MAKE hovertips stay fixed in position # callback=CustomJS(code=""" # var tooltips = document.getElementsByClassName("bk-tooltip"); # for (var i = 0, len = tooltips.length; i < len; i ++) { # tooltips[i].style.top = ""; // unset what bokeh.js sets # tooltips[i].style.left = ""; # tooltips[i].style.bottom = "0px"; # tooltips[i].style.left = "0px"; # } # """))) # TODO optional: activate different tooltips for different renderers, doesn't work properly #tooltips_configs = tooltips[:] + [(p, '@'+p) for p in hp_names] #if 'average' in [l.name for l in lines]: # p.add_tools(HoverTool(renderers=[renderers[0]], tooltips=tooltips_avg ))#, mode='vline')) # Wrap renderers in nested lists for checkbox-code checkbox, select_all, select_none = get_checkbox( renderers, [l[0] for l in legend_it]) checkbox.active = [0] # Tilt tick labels and configure axis labels p.xaxis.major_label_orientation = 3 / 4 p.xaxis.axis_label_text_font_size = p.yaxis.axis_label_text_font_size = "15pt" p.xaxis.major_label_text_font_size = p.yaxis.major_label_text_font_size = "12pt" p.title.text_font_size = "15pt" legend = Legend( items=legend_it, location='bottom_left', #(0, -60), label_text_font_size="8pt") legend.click_policy = "hide" p.add_layout(legend, 'right') # Assign objects and save png's layout = row( p, column( widgetbox(checkbox, width=100), row(widgetbox(select_all, width=50), widgetbox(select_none, width=50)))) output_path = os.path.join(self.output_dir, output_fn) export_bokeh(p, output_path, self.logger) self.plots.append(output_path) return layout
def _plot(self, result_object, learning_curves, hyperparameter_names, reset_times=False): # Extract information from learning-curve-dict times, losses, config_ids, = [], [], [] for conf_id, learning_curves in learning_curves.items(): # self.logger.debug("Config ID: %s, learning_curves: %s", str(conf_id), str(learning_curves)) for lc in learning_curves: if len(lc) == 0: continue tmp = list( zip(*[(time, loss) for time, loss in lc if np.isfinite(loss) and loss is not None])) if len(tmp) == 0: self.logger.debug( "Probably filtered NaNs or None's.., skipping %s, data %s", str(conf_id), str(lc)) continue times.append(tmp[0]) losses.append(tmp[1]) config_ids.append(conf_id) if reset_times: times = [np.array(ts) - ts[0] for ts in times] # Prepare ColumnDataSource data = OrderedDict([ ('config_id', []), ('config_info', []), ('times', []), ('losses', []), ('duration', []), ('HB_iteration', []), ('colors', []), ('colors_performance', []), ('colors_iteration', []), ]) for hp in hyperparameter_names: data[hp] = [] # Populate id2conf = result_object.get_id2config_mapping() for counter, c_id in enumerate(config_ids): if not (len(times[counter]) == len(losses[counter])): raise ValueError() longest_run = self.get_longest_run(c_id, result_object) if not longest_run: continue data['config_id'].append(str(c_id)) try: conf_info = '\n'.join([ str(k) + "=" + str(v) for k, v in sorted(id2conf[c_id]['config_info'].items()) ]) except KeyError: conf_info = 'Not Available' data['config_info'].append(conf_info) data['times'].append(times[counter]) data['losses'].append(losses[counter]) if longest_run: data['duration'].append( longest_run['time_stamps']['finished'] - longest_run['time_stamps']['started']) else: data['duration'].append('N/A') data['HB_iteration'].append(str(c_id[0])) for hp in hyperparameter_names: try: data[hp].append(id2conf[c_id]['config'][hp]) except KeyError: data[hp].append("None") data['colors'].append(losses[counter][-1]) data['colors_performance'].append(losses[counter][-1]) data['colors_iteration'].append(c_id[0]) # Tooltips tooltips = [(key, '@' + key) for key in data.keys() if key not in [ 'times', 'duration', 'colors', 'colors_performance', 'colors_iteration' ]] tooltips.insert(4, ('duration (sec)', '@duration')) tooltips.insert(5, ('Configuration', ' ')) hover = HoverTool(tooltips=tooltips) # Create sources source_multiline = ColumnDataSource(data=data) # Special source for scattering points, since times and losses for multi_line are nested lists scatter_data = {key: [] for key in data.keys()} for idx, c_id in enumerate(data['config_id']): for t, l in zip(data['times'][idx], data['losses'][idx]): scatter_data['times'].append(t) scatter_data['losses'].append(l) for key in list(data.keys()): if key in ['times', 'losses']: continue scatter_data[key].append(data[key][idx]) source_scatter = ColumnDataSource(data=scatter_data) # Color min_perf, max_perf = min([loss[-1] for loss in data['losses']]), max( [loss[-1] for loss in data['losses']]) min_iter, max_iter = min([int(i) for i in data['HB_iteration']]), max( [int(i) for i in data['HB_iteration']]) color_mapper = LinearColorMapper(palette=Spectral11, low=min_perf, high=max_perf) # Create plot y_axis_type = "log" if len( [a for a in scatter_data['losses'] if a <= 0]) == 0 else 'linear' x_min, x_max = min(scatter_data['times']), max(scatter_data['times']) x_pad = (x_max - x_min) / 10 x_min -= x_pad x_max += x_pad y_min, y_max = min(scatter_data['losses']), max(scatter_data['losses']) y_pad = (y_max - y_min) / 10 y_min -= ( y_min / 10 ) if y_axis_type == 'log' else y_pad # because this must not be below 0 if it's a logscale y_max += y_pad * 10 if y_axis_type == 'log' else y_pad p = figure( plot_height=500, plot_width=600, y_axis_type=y_axis_type, tools=[hover, 'save', 'pan', 'wheel_zoom', 'box_zoom', 'reset'], x_axis_label='Time', y_axis_label='Cost', x_range=Range1d(x_min, x_max, bounds='auto'), y_range=Range1d(y_min, y_max, bounds='auto'), ) # Plot per HB_iteration, each config individually HB_iterations = sorted(set(data['HB_iteration'])) max_label_len = max([len(iteration) for iteration in HB_iterations]) HB_handles, HB_labels = [], [] self.logger.debug( "Assuming config_info to be either \"model_based_pick=True\" or \"model_based_pick=False\"" ) for it in HB_iterations: line_handles = [] view = CDSView(source=source_multiline, filters=[ GroupFilter(column_name='HB_iteration', group=str(it)) ]) line_handles.append( p.multi_line( xs='times', ys='losses', source=source_multiline, view=view, color={ 'field': 'colors', 'transform': color_mapper }, alpha=0.5, line_width=5, )) # Separate modelbased and rand om view = CDSView(source=source_scatter, filters=[ GroupFilter(column_name='HB_iteration', group=str(it)), GroupFilter(column_name='config_info', group="model_based_pick=True") ]) line_handles.append( p.circle_x( x='times', y='losses', source=source_scatter, view=view, fill_color={ 'field': 'colors', 'transform': color_mapper }, fill_alpha=0.5, line_color='colors', size=20, )) view = CDSView(source=source_scatter, filters=[ GroupFilter(column_name='HB_iteration', group=str(it)), GroupFilter(column_name='config_info', group="model_based_pick=False") ]) line_handles.append( p.circle( x='times', y='losses', source=source_scatter, view=view, fill_color={ 'field': 'colors', 'transform': color_mapper }, fill_alpha=0.5, line_color='colors', size=20, )) HB_handles.append(line_handles) HB_labels.append('warmstart data' if it in [-1, '-1'] else '{number:0{width}d}'. format(width=max_label_len, number=int(it))) # Sort all lists according to label HB_iterations, HB_handles, HB_labels = zip(*sorted( zip(HB_iterations, HB_handles, HB_labels), key=lambda tup: tup[2])) HB_iterations, HB_handles, HB_labels = list(HB_iterations), list( HB_handles), list(HB_labels) self.logger.debug("HB_iterations to labels: %s", str(list(zip(HB_iterations, HB_labels)))) chckboxes, select_all, select_none = get_checkbox( HB_handles, HB_labels, max_checkbox_length=10) callback_color = CustomJS(args=dict(source_multiline=source_multiline, source_scatter=source_scatter, cm=color_mapper), code=""" var data_multiline = source_multiline.data; var data_scatter = source_scatter.data; var min_perf = {0}; var max_perf = {1}; var min_iter = {2}; var max_iter = {3}; if (cb_obj.value == 'performance') {{ data_multiline['colors'] = data_multiline['colors_performance']; data_scatter['colors'] = data_scatter['colors_performance']; cm.low = min_perf; cm.high = max_perf; }} else {{ data_multiline['colors'] = data_multiline['colors_iteration']; data_scatter['colors'] = data_scatter['colors_iteration']; cm.low = min_iter; cm.high = max_iter; }} source.change.emit(); """.format(min_perf, max_perf, min_iter, max_iter)) select_color = Select(title="Select colors", value="performance", options=["performance", "iteration"], callback=callback_color) # Put it all together in a layout (width of checkbox-field sizes with number of elements chkbox_width = 650 if len(HB_labels) > 100 else 500 if len( HB_labels) > 70 else 400 layout = row( p, column( *[ widgetbox(chkbox, max_width=chkbox_width, width_policy="min") for chkbox in chckboxes ], row(widgetbox(select_all, width=50), widgetbox(select_none, width=50)), widgetbox(select_color, width=200))) return layout