Beispiel #1
0
    def plot(self):
        """
        Plot performance over time, using all trajectory entries.
        max_time denotes max(wallclock_limit, highest recorded time).
        """
        rh, runs, output_fn, validator = self.rh, self.runs, self.output_fn, self.validator
        # Add lines to be plotted to lines (key-values must be zippable)
        lines = []

        # Get plotting data and create CDS
        if self.bohb_results:
            lines.append(self._get_bohb_line(validator, runs, rh))
            for b in self.bohb_results[0].HB_config['budgets']:
                lines.append(self._get_bohb_line(validator, runs, rh, b))
        else:
            lines.append(self._get_avg(validator, runs, rh))
            lines.extend(self._get_all_runs(validator, runs, rh))

        data = {'name': [], 'time': [], 'mean': [], 'upper': [], 'lower': []}
        hp_names = self.scenario.cs.get_hyperparameter_names()
        for p in hp_names:
            data[p] = []
        for line in lines:
            for t, m, u, l, c in zip(line.time, line.mean, line.upper,
                                     line.lower, line.config):
                if not (np.isfinite(m) and np.isfinite(u) and np.isfinite(l)):
                    self.logger.debug("Why is there a NaN? (%s)", str(line))
                    raise ValueError(
                        "There is a NaN value in your data, this should be filtered out. "
                        "Please report this to github.com/automl/CAVE/issues and provide the "
                        "debug/debug.log and the output of `pip freeze`, if you can."
                    )
                data['name'].append(line.name)
                data['time'].append(t)
                data['mean'].append(m)
                data['upper'].append(u)
                data['lower'].append(l)
                for p in hp_names:
                    data[p].append(c[p] if (c and p in c) else 'inactive')
        source = ColumnDataSource(data=data)

        # Create plot
        x_range = Range1d(min(source.data['time']), max(source.data['time']))
        y_label = 'estimated {}'.format(self.scenario.run_obj if self.scenario.
                                        run_obj != 'quality' else 'cost')
        p = figure(plot_width=700,
                   plot_height=500,
                   tools=['save', 'pan', 'box_zoom', 'wheel_zoom', 'reset'],
                   x_range=x_range,
                   x_axis_type='log',
                   y_axis_type='log'
                   if self.scenario.run_obj == 'runtime' else 'linear',
                   x_axis_label='time (sec)',
                   y_axis_label=y_label,
                   title="Cost over time")

        colors = itertools.cycle(Dark2_5)
        renderers = []
        legend_it = []
        for line, color in zip(lines, colors):
            # CDSview w GroupFilter
            name = line.name
            view = CDSView(
                source=source,
                filters=[GroupFilter(column_name='name', group=str(name))])
            renderers.append([
                p.line('time',
                       'mean',
                       source=source,
                       view=view,
                       line_color=color,
                       visible=True
                       if line.name in ['average', 'all budgets'] else False)
            ])

            # Add to legend
            legend_it.append((name, renderers[-1]))

            if name in ['average', 'all budgets'] or 'budget' in name:
                # Fill area (uncertainty)
                # Defined as sequence of coordinates, so for step-effect double and arange accordingly ([(t0, v0), (t1, v0), (t1, v1), ... (tn, vn-1)])
                band_x = np.append(line.time, line.time[::-1])
                band_y = np.append(line.lower, line.upper[::-1])
                renderers[-1].extend([
                    p.patch(band_x,
                            band_y,
                            color='#7570B3',
                            fill_alpha=0.2,
                            visible=True if line.name
                            in ['average', 'all budgets'] else False)
                ])

        # Tooltips
        tooltips = [("estimated performance", "@mean"), ("at-time", "@time")]
        p.add_tools(
            HoverTool(
                renderers=[i for s in renderers for i in s],
                tooltips=tooltips,
            ))
        # MAKE hovertips stay fixed in position
        #                      callback=CustomJS(code="""
        # var tooltips = document.getElementsByClassName("bk-tooltip");
        # for (var i = 0, len = tooltips.length; i < len; i ++) {
        #     tooltips[i].style.top = ""; // unset what bokeh.js sets
        #     tooltips[i].style.left = "";
        #     tooltips[i].style.bottom = "0px";
        #     tooltips[i].style.left = "0px";
        # }
        # """)))

        # TODO optional: activate different tooltips for different renderers, doesn't work properly
        #tooltips_configs = tooltips[:] + [(p, '@'+p) for p in hp_names]
        #if 'average' in [l.name for l in lines]:
        #    p.add_tools(HoverTool(renderers=[renderers[0]], tooltips=tooltips_avg   ))#, mode='vline'))

        # Wrap renderers in nested lists for checkbox-code
        checkbox, select_all, select_none = get_checkbox(
            renderers, [l[0] for l in legend_it])
        checkbox.active = [0]

        # Tilt tick labels and configure axis labels
        p.xaxis.major_label_orientation = 3 / 4

        p.xaxis.axis_label_text_font_size = p.yaxis.axis_label_text_font_size = "15pt"
        p.xaxis.major_label_text_font_size = p.yaxis.major_label_text_font_size = "12pt"
        p.title.text_font_size = "15pt"

        legend = Legend(
            items=legend_it,
            location='bottom_left',  #(0, -60),
            label_text_font_size="8pt")
        legend.click_policy = "hide"

        p.add_layout(legend, 'right')

        # Assign objects and save png's
        layout = row(
            p,
            column(
                widgetbox(checkbox, width=100),
                row(widgetbox(select_all, width=50),
                    widgetbox(select_none, width=50))))

        output_path = os.path.join(self.output_dir, output_fn)
        export_bokeh(p, output_path, self.logger)
        self.plots.append(output_path)

        return layout
    def plot(self,
             X,
             conf_list: list,
             runs_per_quantile,
             inc_list: list = None,
             contour_data=None,
             time_slider=False):
        """
        plots sampled configuration in 2d-space;
        uses bokeh for interactive plot
        saves results in self.output, if set

        Parameters
        ----------
        X: np.array
            np.array with 2-d coordinates for each configuration
        conf_list: list
            list of ALL configurations in the same order as X
        runs_per_quantile: list[np.array]
            configurator-run to be analyzed, as a np.array with
            the number of target-algorithm-runs per config per quantile.
        inc_list: list
            list of incumbents (Configuration)
        contour_data: list
            contour data (xx,yy,Z)
        time_slider: bool
            whether or not to have a time_slider-widget on cfp-plot
            INCREASES FILE-SIZE DRAMATICALLY

        Returns
        -------
        (script, div): str
            script and div of the bokeh-figure
        over_time_paths: List[str]
            list with paths to the different quantiled timesteps of the
            configurator run (for static evaluation)
        """
        if not inc_list:
            inc_list = []
        over_time_paths = []  # development of the search space over time

        hp_names = [
            k.name for k in  # Hyperparameter names
            conf_list[0].configuration_space.get_hyperparameters()
        ]

        # Get individual sources for quantiles
        sources = [
            self._plot_get_source(conf_list, quantiled_run, X, inc_list,
                                  hp_names)
            for quantiled_run in runs_per_quantile
        ]

        # Define what appears in tooltips
        # TODO add only important parameters (needs to change order of exec pimp before conf-footprints)
        hover = HoverTool(
            tooltips=[('type', '@type'), ('origin',
                                          '@origin'), ('runs', '@runs')] +
            [(k, '@' + escape_parameter_name(k)) for k in hp_names])

        # bokeh-figure
        x_range = [min(X[:, 0]) - 1, max(X[:, 0]) + 1]
        y_range = [min(X[:, 1]) - 1, max(X[:, 1]) + 1]

        scatter_glyph_render_groups = []
        for idx, source in enumerate(sources):
            if not time_slider or idx == 0:
                # Only plot all quantiles in one plot if timeslider is on
                p = figure(plot_height=500,
                           plot_width=600,
                           tools=[hover, 'save'],
                           x_range=x_range,
                           y_range=y_range)
                if contour_data is not None:
                    p = self._plot_contour(p, contour_data, x_range, y_range)
            views, markers = self._plot_create_views(source)
            self.logger.debug("Plotting quantile %d!", idx)
            scatter_glyph_render_groups.append(
                self._plot_scatter(p, source, views, markers))
            if self.output_dir:
                file_path = "cfp_over_time/configurator_footprint" + str(
                    idx) + ".png"
                over_time_paths.append(os.path.join(self.output_dir,
                                                    file_path))
                self.logger.debug("Saving plot to %s", over_time_paths[-1])
                export_bokeh(p, over_time_paths[-1], self.logger)

        if time_slider:
            self.logger.debug("Adding timeslider")
            slider = self._plot_get_timeslider(scatter_glyph_render_groups)
            layout = column(p, widgetbox(slider))
        else:
            self.logger.debug("Not adding timeslider")
            layout = column(p)

        script, div = components(layout)

        if self.output_dir:
            path = os.path.join(self.output_dir,
                                "content/images/configurator_footprint.png")
            export_bokeh(p, path, self.logger)

        return (script, div), over_time_paths
Beispiel #3
0
    def plot(self,
             X,
             conf_list: list,
             runs_per_quantile,
             inc_list: list=None,
             contour_data=None,
             use_timeslider=False,
             use_checkbox=True,
             timeslider_labels=None):
        """
        plots sampled configuration in 2d-space;
        uses bokeh for interactive plot
        saves results in self.output, if set

        Parameters
        ----------
        X: np.array
            np.array with 2-d coordinates for each configuration
        conf_list: list
            list of ALL configurations in the same order as X
        runs_per_quantile: list[np.array]
            configurator-run to be analyzed, as a np.array with
            the number of target-algorithm-runs per config per quantile.
        inc_list: list
            list of incumbents (Configuration)
        contour_data: list
            contour data (xx,yy,Z)
        use_timeslider: bool
            whether or not to have a time_slider-widget on cfp-plot
            INCREASES FILE-SIZE DRAMATICALLY
        use_checkbox: bool
            have checkboxes to toggle individual runs

        Returns
        -------
        (script, div): str
            script and div of the bokeh-figure
        over_time_paths: List[str]
            list with paths to the different quantiled timesteps of the
            configurator run (for static evaluation)
        """
        if not inc_list:
            inc_list = []
        over_time_paths = []  # development of the search space over time

        hp_names = [k.name for k in  # Hyperparameter names
                    conf_list[0].configuration_space.get_hyperparameters()]

        # bokeh-figure
        x_range = [min(X[:, 0]) - 1, max(X[:, 0]) + 1]
        y_range = [min(X[:, 1]) - 1, max(X[:, 1]) + 1]

        # Get individual sources for quantiles
        sources, used_configs = zip(*[self._plot_get_source(conf_list, quantiled_run, X, inc_list, hp_names)
                                      for quantiled_run in runs_per_quantile])

        # We collect all glyphs in one list
        # Then we have to dicts to identify groups of glyphs (for interactivity)
        # They map the name of the group to a list of indices (of the respective glyphs that are in the group)
        # Those indices refer to the main list of all glyphs
        # This is necessary to enable interactivity for two inputs at the same time
        all_glyphs = []
        overtime_groups = {}
        run_groups = {run : [] for run in self.configs_in_run.keys()}

        # Iterate over quantiles (this updates overtime_groups)
        for idx, source, u_cfgs in zip(range(len(sources)), sources, used_configs):
            # Create new plot if necessary (only plot all quantiles in one single plot if timeslider is on)
            if not use_timeslider or idx == 0:
                p = self._create_figure(x_range, y_range)
                if contour_data is not None:  # TODO
                    contour_handles, color_mapper = self._plot_contour(p, contour_data, x_range, y_range)

            # Create views and scatter
            views, views_by_run, markers = self._create_views(source, u_cfgs)
            scatter_handles = self._scatter(p, source, views, markers)
            self.logger.debug("Quantile %d: %d scatter-handles", idx, len(scatter_handles))
            if len(scatter_handles) == 0:
                self.logger.debug("No configs in quantile %d (?!)", idx)
                continue

            # Add to groups
            start = len(all_glyphs)
            all_glyphs.extend(scatter_handles)
            overtime_groups[str(idx)] = [str(i) for i in range(start, len(all_glyphs))]
            for run, indices in views_by_run.items():
                run_groups[run].extend([str(start + i) for i in indices])

            # Write to file
            if self.output_dir:
                file_path = "cfp_over_time/configurator_footprint" + str(idx) + ".png"
                over_time_paths.append(os.path.join(self.output_dir, file_path))
                self.logger.debug("Saving plot to %s", over_time_paths[-1])
                export_bokeh(p, over_time_paths[-1], self.logger)

        # Add hovertool (define what appears in tooltips)
        # TODO add only important parameters (needs to change order of exec pimp before conf-footprints)
        hover = HoverTool(tooltips=[('type', '@type'), ('origin', '@origin'), ('runs', '@runs')] +
                                   [(k, '@' + escape_parameter_name(k)) for k in hp_names],
                          renderers=all_glyphs)
        p.add_tools(hover)

        # Build dashboard
        timeslider, checkbox, select_all, select_none, checkbox_title = self._get_widgets(all_glyphs, overtime_groups, run_groups,
                                                                                          slider_labels=timeslider_labels)
        contour_checkbox, contour_title = self._contour_radiobuttongroup(contour_handles, color_mapper)
        layout = p
        if use_timeslider:
            self.logger.debug("Adding timeslider")
            layout = column(layout, widgetbox(timeslider))
        if use_checkbox:
            self.logger.debug("Adding checkboxes")
            layout = row(layout,
                         column(widgetbox(checkbox_title),
                                widgetbox(checkbox),
                                row(widgetbox(select_all, width=100),
                                    widgetbox(select_none, width=100)),
                                widgetbox(contour_title),
                                widgetbox(contour_checkbox)))

        if self.output_dir:
            path = os.path.join(self.output_dir, "content/images/configurator_footprint.png")
            export_bokeh(p, path, self.logger)

        return layout, over_time_paths
    def plot_interactive_footprint(self):
        """Use bokeh to create an interactive algorithm footprint with zoom and
        hover tooltips. Should avoid problems with overplotting (since we can
        zoom) and provide better information about instances."""
        features = np.array(self.features_2d)
        instances = self.insts
        runhistory = self.rh
        algo = {v: k for k, v in self.algo_name.items()}
        incumbent = algo['incumbent']
        default = algo['default']
        source = ColumnDataSource(data=dict(x=features[:, 0], y=features[:,
                                                                         1]))
        # Add all necessary information for incumbent and default
        source.add(instances, 'instance_name')
        instance_set = [
            'train' if i in self.train_feats.keys() else 'test'
            for i in instances
        ]
        source.add(instance_set, 'instance_set')  # train or test
        for config, name in [(incumbent, 'incumbent'), (default, 'default')]:
            cost = get_cost_dict_for_config(runhistory, config)
            source.add([cost[i] for i in instances], '{}_cost'.format(name))
            # TODO should be in function
            good, bad = self._get_good_bad(config)
            color = [
                1 if idx in good else 0 for idx, i in enumerate(instances)
            ]
            # TODO end
            color = ['blue' if c else 'red' for c in color]
            self.logger.debug("%s colors: %s", name, str(color))
            source.add(color, '{}_color'.format(name))
        source.add(source.data['default_color'], 'color')

        # Define what appears in tooltips
        hover = HoverTool(tooltips=[
            ('instance name', '@instance_name'),
            ('def cost', '@default_cost'),
            ('inc_cost', '@incumbent_cost'),
            ('set', '@instance_set'),
        ])

        # Add radio-button
        def_inc_callback = CustomJS(args=dict(source=source),
                                    code="""
            var data = source.data;
            if (cb_obj.active == 0) {
                data['color'] = data['default_color'];
            } else {
                data['color'] = data['incumbent_color'];
            }
            source.change.emit();
            """)

        def_inc_radio_button = RadioButtonGroup(
            labels=["default", "incumbent"],
            active=0,
            callback=def_inc_callback)

        # Plot
        x_range = DataRange1d(bounds='auto',
                              start=min(features[:, 0]) - 1,
                              end=max(features[:, 0]) + 1)
        y_range = DataRange1d(bounds='auto',
                              start=min(features[:, 1]) - 1,
                              end=max(features[:, 1]) + 1)
        p = figure(
            plot_height=500,
            plot_width=600,
            tools=[hover, 'save', 'wheel_zoom', 'box_zoom', 'pan', 'reset'],
            active_drag='box_zoom',
            x_range=x_range,
            y_range=y_range)
        # Scatter train and test individually to toggle them
        train_view = CDSView(
            source=source,
            filters=[GroupFilter(column_name='instance_set', group='train')])
        test_view = CDSView(
            source=source,
            filters=[GroupFilter(column_name='instance_set', group='test')])
        train = p.scatter(x='x',
                          y='y',
                          source=source,
                          view=train_view,
                          color='color')
        test = p.scatter(x='x',
                         y='y',
                         source=source,
                         view=test_view,
                         color='color')
        p.xaxis.axis_label, p.yaxis.axis_label = 'principal component 1', 'principal component 2'
        p.xaxis.axis_label_text_font_size = p.yaxis.axis_label_text_font_size = "15pt"

        train_test_callback = CustomJS(args=dict(source=source,
                                                 train_view=train,
                                                 test_view=test),
                                       code="""
            var data = source.data;
            if (cb_obj.active == 0) {
                train_view.visible = true;
                test_view.visible = true;
            } else if (cb_obj.active == 1) {
                train_view.visible = true;
                test_view.visible = false;
            } else {
                train_view.visible = false;
                test_view.visible = true;
            }
            """)
        train_test_radio_button = RadioButtonGroup(
            labels=["all", "train", "test"],
            active=0,
            callback=train_test_callback)

        # Export and return
        if self.output_dir:
            path = os.path.join(self.output_dir,
                                "content/images/algorithm_footprint.png")
            export_bokeh(p, path, self.logger)

        layout = column(
            p,
            row(widgetbox(def_inc_radio_button),
                widgetbox(train_test_radio_button)))
        return layout
Beispiel #5
0
    def plot(self,
             rh: RunHistory,
             runs: List[ConfiguratorRun],
             output_fn: str = "performance_over_time.png",
             validator: Union[None, Validator] = None):
        """ Plot performance over time, using all trajectory entries
            with max_time = wallclock_limit or (if inf) the highest
            recorded time

            Parameters
            ----------
            rh: RunHistory
                runhistory to use
            runs: List[SMACrun]
                list of configurator-runs
            output_fn: str
                path to output-png
            validator: TODO description
        """

        self.logger.debug(
            "Estimating costs over time for %d runs, save png in %s.",
            len(runs), output_fn)
        validated = True  # TODO ?

        if len(runs) > 1:
            # If there is more than one run, we average over the runs
            means, times = [], []
            all_times = []
            for run in runs:
                # Ignore variances as we plot variance over runs
                validated = validated and run.traj
                mean, _, time = self._get_mean_var_time(
                    validator, run.traj, not run.validated_runhistory, rh)
                means.append(mean.flatten())
                all_times.extend(time)
                times.append(time)
            means = np.array(means)
            times = np.array(times)
            all_times = np.array(sorted(all_times))
            at = [0 for _ in runs
                  ]  # keep track at which timestep each trajectory is
            m = [np.nan
                 for _ in runs]  # used to compute the mean over the timesteps
            mean = np.ones((len(all_times), 1)) * -1
            var = np.ones((len(all_times), 1)) * -1
            upper = np.ones((len(all_times), 1)) * -1
            lower = np.ones((len(all_times), 1)) * -1
            for time_idx, t in enumerate(all_times):
                for traj_idx, entry_idx in enumerate(at):
                    try:
                        if t == times[traj_idx][entry_idx]:
                            m[traj_idx] = means[traj_idx][entry_idx]
                            at[traj_idx] += 1
                    except IndexError:
                        pass  # Reached the end of one trajectory. No need to check it further
                # var[time_idx][0] = np.nanvar(m)
                u, l, m_ = np.nanpercentile(m, 75), np.nanpercentile(
                    m, 25), np.nanpercentile(m, 50)
                # self.logger.debug((mean[time_idx][0] + np.sqrt(var[time_idx][0]), mean[time_idx][0],
                #                   mean[time_idx][0] - np.sqrt(var[time_idx][0])))
                # self.logger.debug((l, m_, u))
                upper[time_idx][0] = u
                mean[time_idx][0] = m_
                lower[time_idx][0] = l
            time = all_times
        else:  # no new statistics computation necessary
            validated = True if runs[0].validated_runhistory else False
            mean, var, time = self._get_mean_var_time(validator, runs[0].traj,
                                                      not validated, rh)
            upper = lower = mean

        mean = mean[:, 0]
        upper = upper[:, 0]
        lower = lower[:, 0]

        uncertainty_upper = upper  # mean + np.sqrt(var)
        uncertainty_lower = lower  # mean - np.sqrt(var)
        clip_y_lower = False
        if self.scenario.run_obj == 'runtime':  # y-axis on log -> clip plot
            # Determine clipping point from lowest legal value
            clip_y_lower = min(
                list(uncertainty_lower[uncertainty_lower > 0]) +
                list(mean)) * 0.8
            uncertainty_lower[uncertainty_lower <= 0] = clip_y_lower * 0.9

        time_double = [t for sub in zip(time, time) for t in sub][1:-1]
        mean_double = [t for sub in zip(mean, mean) for t in sub][:-2]
        source = ColumnDataSource(
            data=dict(x=time_double, y=mean_double, epm_perf=mean_double))

        hover = HoverTool(tooltips=[("performance",
                                     "@epm_perf"), ("at-time", "@x")])

        p = figure(plot_width=700,
                   plot_height=500,
                   tools=[hover, 'save'],
                   x_range=Range1d(max(min(time), 1), max(time)),
                   x_axis_type='log',
                   y_axis_type='log'
                   if self.scenario.run_obj == 'runtime' else 'linear',
                   title="Cost over time")

        if clip_y_lower:
            p.y_range = Range1d(clip_y_lower, 1.2 * max(uncertainty_upper))

        # start after 1% of the configuration budget
        # p.x_range = Range1d(min(time) + (max(time) - min(time)) * 0.01, max(time))

        # Plot
        label = self.scenario.run_obj
        label = '{}{}'.format('validated ' if validated else 'estimated ',
                              label)
        p.line('x', 'y', source=source, legend=label)

        # Fill area (uncertainty)
        # Defined as sequence of coordinates, so for step-effect double and
        # arange accordingly ([(t0, v0), (t1, v0), (t1, v1), ... (tn, vn-1)])
        time_double = [t for sub in zip(time, time) for t in sub][1:-1]
        uncertainty_lower_double = [
            u for sub in zip(uncertainty_lower, uncertainty_lower) for u in sub
        ][:-2]
        uncertainty_upper_double = [
            u for sub in zip(uncertainty_upper, uncertainty_upper) for u in sub
        ][:-2]
        band_x = np.append(time_double, time_double[::-1])
        band_y = np.append(uncertainty_lower_double,
                           uncertainty_upper_double[::-1])
        p.patch(band_x, band_y, color='#7570B3', fill_alpha=0.2)

        # Tilt tick labels
        p.xaxis.major_label_orientation = 3 / 4

        p.legend.location = "top_right"

        p.xaxis.axis_label = "time (sec)"
        p.yaxis.axis_label = label
        p.xaxis.axis_label_text_font_size = "15pt"
        p.yaxis.axis_label_text_font_size = "15pt"
        p.xaxis.major_label_text_font_size = "12pt"
        p.yaxis.major_label_text_font_size = "12pt"
        p.title.text_font_size = "15pt"
        p.legend.label_text_font_size = "15pt"

        script, div = components(p)

        export_bokeh(p, os.path.join(self.output_dir, output_fn), self.logger)

        return script, div