Example #1
0
 def _permutation_test(self,
                       epm_rh,
                       default,
                       incumbent,
                       num_permutations,
                       par=1):
     if par != 1 and not self.scenario.cutoff:
         return np.nan
     cutoff = self.scenario.cutoff
     def_cost = get_cost_dict_for_config(epm_rh,
                                         default,
                                         par=par,
                                         cutoff=cutoff)
     inc_cost = get_cost_dict_for_config(epm_rh,
                                         incumbent,
                                         par=par,
                                         cutoff=cutoff)
     data1, data2 = zip(*[(def_cost[i], inc_cost[i])
                          for i in def_cost.keys()])
     p = paired_permutation(data1,
                            data2,
                            self.rng,
                            num_permutations=num_permutations,
                            logger=self.logger)
     self.logger.debug(
         "p-value for def/inc-difference: %f (permutation test "
         "with %d permutations and par %d)", p, num_permutations, par)
     return p
Example #2
0
    def plot_cdf_compare(self, default: Configuration,
                         incumbent: Configuration, rh: RunHistory):
        """
        Plot the cumulated distribution functions for given configurations,
        plots will share y-axis and if desired x-axis.
        Saves plot to file.

        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations

        Returns
        -------
        output_fns: List[str]
            list with paths to generated plots
        """
        out_fn = os.path.join(self.output_dir, 'cdf')
        self.logger.info("... plotting eCDF")
        self.logger.debug("Plot CDF to %s_[train|test].png", out_fn)

        timeout = self.scenario.cutoff

        def prepare_data(x_data):
            """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """
            x_data = sorted(x_data)
            y_data = np.array(range(len(x_data))) / (len(x_data) - 1)
            for idx in range(len(x_data)):
                if (timeout is not None) and (x_data[idx] >= timeout):
                    x_data[idx] = timeout
                    y_data[idx] = y_data[idx - 1]
            return (x_data, y_data)

        # Generate y_data
        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()
        train, test = self.scenario.train_insts, self.scenario.test_insts

        output_fns = []

        for insts, name in [(train, 'train'), (test, 'test')]:
            if insts == [None]:
                self.logger.debug("No %s instances, skipping cdf", name)
                continue
            data = [
                prepare_data(np.array([v for k, v in costs if k in insts]))
                for costs in [def_costs, inc_costs]
            ]
            x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1])
            labels = ['default ' + name, 'incumbent ' + name]
            output_fns.append(
                plot_cdf(x,
                         y,
                         labels,
                         timeout=self.scenario.cutoff,
                         out_fn=out_fn + '_{}.png'.format(name)))

        return output_fns
Example #3
0
    def __init__(self,
                 original_rh,
                 validated_rh,
                 default,
                 incumbent,
                 train_test,
                 scenario,
                 validator,
                 output,
                 max_pimp_samples,
                 fanova_pairwise=True):
        """
        Parameters
        ----------
        original_rh: RunHistory
            runhistory containing all runs that have actually been run
        validated_rh: RunHistory
            runhistory containing all runs from original_rh + estimates for
            default and all incumbents for all instances
        default, incumbent: Configuration
            default and overall incumbent
        train_test: bool
            whether is distinction is made (in cdf and scatter)
        scenario: Scenario
            the scenario object
        validator: Validator
            validator object (to estimate using EPM)
        output: string
            output-directory
        """
        self.logger = logging.getLogger("cave.analyzer")

        # Important objects for analysis
        self.original_rh = original_rh
        self.validated_rh = validated_rh
        self.default = default
        self.incumbent = incumbent
        self.train_test = train_test
        self.scenario = scenario
        self.validator = validator
        self.pimp = None  # PIMP object for reuse
        self.feat_analysis = None  # feat_analysis object for reuse
        self.evaluators = []
        self.output = output

        self.importance = None  # Used to store dictionary containing parameter
        # importances, so it can be used by analysis
        self.feat_importance = None  # Used to store dictionary w feat_imp

        conf1_runs = get_cost_dict_for_config(self.validated_rh, self.default)
        conf2_runs = get_cost_dict_for_config(self.validated_rh,
                                              self.incumbent)
        self.plotter = Plotter(self.scenario,
                               self.train_test,
                               conf1_runs,
                               conf2_runs,
                               output=self.output)
        self.max_pimp_samples = max_pimp_samples
        self.fanova_pairwise = fanova_pairwise
Example #4
0
 def _paired_t_test(self, epm_rh, default, incumbent, num_permutations):
     def_cost, inc_cost = get_cost_dict_for_config(
         epm_rh, default), get_cost_dict_for_config(epm_rh, incumbent)
     data1, data2 = zip(*[(def_cost[i], inc_cost[i])
                          for i in def_cost.keys()])
     p = paired_t_student(data1, data2, logger=self.logger)
     self.logger.debug("p-value for def/inc-difference: %f (paired t-test)",
                       p)
     return p
Example #5
0
    def __init__(self, default: Configuration, incumbent: Configuration,
                 rh: RunHistory, train: List[str], test: Union[List[str],
                                                               None],
                 run_obj: str, cutoff, output_dir: int):
        """
        Creates a scatterplot of the two configurations on the given set of instances.
        Saves plot to file.

        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations
        output_dir: str
            output directory

        Returns
        -------
        output_fns: List[str]
            list with paths to generated plots
        """
        self.logger = logging.getLogger(self.__module__ + '.' +
                                        self.__class__.__name__)

        out_fn_base = os.path.join(output_dir, 'scatter_')
        self.logger.info("... plotting scatter")
        self.logger.debug("Plot scatter to %s[train|test].png", out_fn_base)

        metric = run_obj
        timeout = cutoff
        labels = ["default {}".format(run_obj), "incumbent {}".format(run_obj)]

        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()

        out_fns = []
        for insts, name in [(train, 'train'), (test, 'test')]:
            if insts == [None]:
                self.logger.debug("No %s instances, skipping scatter", name)
                continue
            default = np.array([v for k, v in def_costs if k in insts])
            incumbent = np.array([v for k, v in inc_costs if k in insts])
            min_val = min(min(default), min(incumbent))
            out_fn = out_fn_base + name + '.png'
            out_fns.append(
                plot_scatter_plot((default, ), (incumbent, ),
                                  labels,
                                  metric=metric,
                                  min_val=min_val,
                                  max_val=timeout,
                                  out_fn=out_fn))
        self.output_fns = out_fns
Example #6
0
    def _plot_ecdf(self, default: Configuration, incumbent: Configuration,
                   rh: RunHistory, train: List[str], test: List[str], cutoff,
                   output_dir: str):
        """
        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations
        train, test: List[str]
            lists with corresponding instances
        cutoff: Union[None, int]
            cutoff for target algorithms, if set
        output_dir: str
            directory to save plots in
        """
        out_fn_base = os.path.join(output_dir, 'cdf')
        self.logger.info("... plotting eCDF")

        def prepare_data(x_data):
            """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """
            x_data = sorted(x_data)
            y_data = np.array(range(len(x_data))) / (len(x_data) - 1)
            for idx in range(len(x_data)):
                if (cutoff is not None) and (x_data[idx] >= cutoff):
                    x_data[idx] = cutoff
                    y_data[idx] = y_data[idx - 1]
            return (x_data, y_data)

        # Generate y_data
        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()

        output_fns = []
        if len(train) <= 1 and len(test) <= 1:
            raise NotApplicable("No instances, so no eCDF-plot.")
        for insts, name in [(train, 'train'), (test, 'test')]:
            if len(insts) <= 1:
                self.logger.debug("No %s instances, skipping cdf", name)
                continue
            data = [
                prepare_data(np.array([v for k, v in costs if k in insts]))
                for costs in [def_costs, inc_costs]
            ]
            x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1])
            labels = ['default ' + name, 'incumbent ' + name]
            out_fn = out_fn_base + '_{}.png'.format(name)
            output_fns.append(
                plot_cdf(x, y, labels, timeout=cutoff, out_fn=out_fn))
            self.logger.debug("Plotted eCDF to %s", out_fn)
        return {'figure': output_fns if len(output_fns) > 0 else None}
Example #7
0
    def plot_scatter(self, default: Configuration, incumbent: Configuration,
                     rh: RunHistory):
        """
        Creates a scatterplot of the two configurations on the given set of
        instances.
        Saves plot to file.

        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations

        Returns
        -------
        output_fns: List[str]
            list with paths to generated plots
        """
        out_fn_base = os.path.join(self.output_dir, 'scatter_')
        self.logger.info("... plotting scatter")
        self.logger.debug("Plot scatter to %s[train|test].png", out_fn_base)

        metric = self.scenario.run_obj
        timeout = self.scenario.cutoff
        labels = [
            "default {}".format(self.scenario.run_obj),
            "incumbent {}".format(self.scenario.run_obj)
        ]

        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()
        train, test = self.scenario.train_insts, self.scenario.test_insts

        out_fns = []
        for insts, name in [(train, 'train'), (test, 'test')]:
            if insts == [None]:
                self.logger.debug("No %s instances, skipping scatter", name)
                continue
            default = np.array([v for k, v in def_costs if k in insts])
            incumbent = np.array([v for k, v in inc_costs if k in insts])
            min_val = min(min(default), min(incumbent))
            out_fn = out_fn_base + name + '.png'
            out_fns.append(
                plot_scatter_plot((default, ), (incumbent, ),
                                  labels,
                                  metric=metric,
                                  min_val=min_val,
                                  max_val=timeout,
                                  out_fn=out_fn))
        return out_fns
Example #8
0
    def _plot_scatter(self,
                      default: Configuration,
                      incumbent: Configuration,
                      rh: RunHistory,
                      train: List[str],
                      test: Union[List[str], None],
                      run_obj: str,
                      cutoff,
                      output_dir):
        """
        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations
        train[, test]: list(str)
            instance-names
        run_obj: str
            run-objective (time or quality)
        cutoff: float
            maximum runtime of ta
        output_dir: str
            output directory
        """
        out_fn_base = os.path.join(output_dir, 'scatter_')
        self.logger.info("... plotting scatter")

        metric = run_obj
        timeout = cutoff
        labels = ["default {}".format(run_obj), "incumbent {}".format(run_obj)]

        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()

        out_fns = []
        if len(train) <= 1 and len(test) <= 1:
            raise NotApplicable("No instances, so no scatter-plot.")
        for insts, name in [(train, 'train'), (test, 'test')]:
            if len(insts) <= 1:
                self.logger.debug("No %s instances, skipping scatter", name)
                continue
            default = np.array([v for k, v in def_costs if k in insts])
            incumbent = np.array([v for k, v in inc_costs if k in insts])
            min_val = min(min(default), min(incumbent))
            out_fn = out_fn_base + name + '.png'
            out_fns.append(plot_scatter_plot((default,), (incumbent,), labels, metric=metric,
                           min_val=min_val, max_val=timeout, out_fn=out_fn))
            self.logger.debug("Plotted scatter to %s", out_fn)
        return {'figure' : out_fns if len(out_fns) > 0 else None}
Example #9
0
    def get_oracle(self, instances, rh):
        """Estimation of oracle performance. Collects best performance seen for each instance in any run.

        Parameters
        ----------
        instances: List[str]
            list of instances in question
        rh: RunHistory or List[RunHistory]
            runhistory or list of runhistories (will be combined)

        Results
        -------
        oracle: dict[str->float]
            best seen performance per instance {inst : performance}
        """
        if isinstance(rh, list):
            rh = combine_runhistories(rh)
        self.logger.debug("Calculating oracle performance")
        oracle = {}
        for c in rh.get_all_configs():
            costs = get_cost_dict_for_config(rh, c)
            for i in costs.keys():
                if i not in oracle:
                    oracle[i] = costs[i]
                elif oracle[i] > costs[i]:
                    oracle[i] = costs[i]
        return oracle
Example #10
0
 def get_performance(self, algorithm, instance):
     """
     Return performance according to (possibly EPM-)validated runhistory.
     """
     if not algorithm in self.algo_performance:
         self.algo_performance[algorithm] = get_cost_dict_for_config(self.rh, algorithm)
     return self.algo_performance[algorithm][instance]
Example #11
0
    def create_table(self, incumbents, budget_names, epm_rhs):
        """Create table.

        Parameters
        ----------
        incumbents: List[Configuration]
            incumbents per budget, assuming ascending order
        budget_names: List[str]
            budget-names as strings
        epm_rhs: List[RunHistory]
            estimated runhistories for budgets, same length and order as incumbents"""
        self.logger.info("... create performance table")
        if not (len(incumbents) == len(epm_rhs)
                and len(incumbents) == len(budget_names)):
            raise ValueError(
                "Number of incumbents must equal number of names and runhistories"
            )

        budget_names = [b.split('/')[-1] for b in budget_names]
        dec_place = 3

        # Get costs
        costs = []
        for inc, epm_rh in zip(incumbents, epm_rhs):
            cost_dict_inc = get_cost_dict_for_config(epm_rh, inc)
            costs.append(np.mean([float(v) for v in cost_dict_inc.values()]))

        keys = [
            k for k in incumbents[0].keys()
            if any([inc[k] for inc in incumbents])
        ]
        values = []
        for inc, c in zip(incumbents, costs):
            new_values = [
                inc[k] if inc[k] is not None else "inactive" for k in keys
            ]
            new_values.append(str(round(c, dec_place)))
            values.append(new_values)

        keys.append('Cost')
        table = list(zip(keys, *values))
        keys, table = [k[0] for k in table], [k[1:] for k in table]
        self.table = df = DataFrame(data=table,
                                    columns=budget_names,
                                    index=keys)
        self.html_table = df.to_html()
Example #12
0
    def _get_cost(self, algorithm, instance=None):
        """
        Return cost according to (possibly EPM-)validated runhistory.

        Parameters
        ----------
        algorithm: Configuration
            config
        instance: str
            instance name
        """
        if not hasattr(self, '__algo_cost'):
            self.__algo_cost = {
            }  # Use function self._get_cost!! Maps algo -> {instance -> cost}
        if algorithm not in self.__algo_cost:
            #self.logger.debug("Getting cost for %s, using PAR1-score", self.algo_name[algorithm])
            self.__algo_cost[algorithm] = get_cost_dict_for_config(
                self.rh, algorithm)
        if instance:
            return self.__algo_cost[algorithm][instance]
        else:
            return self.__algo_cost[algorithm]
Example #13
0
    def get_parX(self, config, par=10):
        """Calculate parX-values of default and incumbent configs.
        First determine PAR-timeouts for each run on each instances,
        Second average over train/test if available, else just average.

        Parameters
        ----------
        config: Configuration
            config to be calculated
        par: int
            par-factor to use

        Returns
        -------
        (train, test) OR average -- tuple<float, float> OR float
            PAR10 values for train- and test-instances, if available as tuple
            else the general average
        """
        runs = get_cost_dict_for_config(self.validated_rh, config)
        # Penalize
        if self.scenario.cutoff:
            runs = [(k, runs[k]) if runs[k] < self.scenario.cutoff else
                    (k, self.scenario.cutoff * par) for k in runs]
        else:
            runs = [(k, runs[k]) for k in runs]
            self.logger.info("Calculating penalized average runtime without "
                             "cutoff...")

        # Average
        if self.train_test:
            train = np.mean(
                [c for i, c in runs if i in self.scenario.train_insts])
            test = np.mean(
                [c for i, c in runs if i in self.scenario.test_insts])
            return (train, test)
        else:
            return np.mean([c for i, c in runs])
Example #14
0
    def create_performance_table(self, default, incumbent, epm_rh, oracle):
        """Create table, compare default against incumbent on train-,
        test- and combined instances. Listing PAR10, PAR1 and timeouts.
        Distinguishes between train and test, if available."""
        self.logger.info("... create performance table")
        cost_dict_def = get_cost_dict_for_config(epm_rh, default)
        cost_dict_inc = get_cost_dict_for_config(epm_rh, incumbent)

        def_par1, inc_par1 = self.get_parX(cost_dict_def,
                                           1), self.get_parX(cost_dict_inc, 1)
        def_par10, inc_par10 = self.get_parX(cost_dict_def, 10), self.get_parX(
            cost_dict_inc, 10)
        ora_par1, ora_par10 = self.get_parX(oracle,
                                            1), self.get_parX(oracle, 10)

        def_timeouts = get_timeout(epm_rh, default, self.scenario.cutoff)
        inc_timeouts = get_timeout(epm_rh, incumbent, self.scenario.cutoff)
        def_timeouts_tuple = self.timeouts_to_tuple(def_timeouts)
        inc_timeouts_tuple = self.timeouts_to_tuple(inc_timeouts)
        if self.scenario.cutoff:
            ora_timeout = self.timeouts_to_tuple(
                {i: c < self.scenario.cutoff
                 for i, c in oracle.items()})
            data1, data2 = zip(*[(int(def_timeouts[i]), int(inc_timeouts[i]))
                                 for i in def_timeouts.keys()])
            p_value_timeouts = "%.5f" % paired_permutation(
                data1,
                data2,
                self.rng,
                num_permutations=10000,
                logger=self.logger)
        else:
            ora_timeout = self.timeouts_to_tuple({})
            p_value_timeouts = "N/A"
        # p-values (paired permutation)
        p_value_par10 = self._permutation_test(epm_rh, default, incumbent,
                                               10000, 10)
        p_value_par10 = "%.5f" % p_value_par10 if np.isfinite(
            p_value_par10) else 'N/A'
        p_value_par1 = self._permutation_test(epm_rh, default, incumbent,
                                              10000, 1)
        p_value_par1 = "%.5f" % p_value_par1 if np.isfinite(
            p_value_par1) else 'N/A'

        dec_place = 3

        metrics = []
        if self.scenario.run_obj == 'runtime':
            metrics.append('PAR10')
            metrics.append('PAR1')
        else:
            metrics.append('Quality')
        if self.scenario.cutoff:
            metrics.append('Timeouts')

        train, test = len(self.scenario.train_insts) > 1, len(
            self.scenario.test_insts) > 1
        oracle = train or test  # oracle only makes sense with instances
        # Create table
        array = []
        if 'PAR10' in metrics:
            if train and test:
                values = [
                    def_par10[0], inc_par10[0], ora_par10[0], def_par10[1],
                    inc_par10[1], ora_par10[1]
                ]
            elif oracle:
                values = [def_par10, inc_par10,
                          ora_par10]  # oracle only with instances
            else:
                values = [def_par10, inc_par10]
            values = [
                round(value, dec_place) if np.isfinite(value) else 'N/A'
                for value in values
            ]
            if train or test:
                values.append(p_value_par10)
            array.append(values)
        if 'PAR1' in metrics or 'Quality' in metrics:
            if train and test:
                values = [
                    def_par1[0], inc_par1[0], ora_par1[0], def_par1[1],
                    inc_par1[1], ora_par1[1]
                ]
            elif oracle:
                values = [def_par1, inc_par1,
                          ora_par1]  # oracle only with instances
            else:
                values = [def_par1, inc_par1]
            values = [
                round(value, dec_place) if np.isfinite(value) else 'N/A'
                for value in values
            ]
            if train or test:
                values.append(p_value_par1)
            array.append(values)
        if 'Timeouts' in metrics:
            if train and test:
                values = [
                    "{}/{}".format(def_timeouts_tuple[0][0],
                                   def_timeouts_tuple[0][1]),
                    "{}/{}".format(inc_timeouts_tuple[0][0],
                                   inc_timeouts_tuple[0][1]),
                    "{}/{}".format(ora_timeout[0][0], ora_timeout[0][1]),
                    "{}/{}".format(def_timeouts_tuple[1][0],
                                   def_timeouts_tuple[1][1]),
                    "{}/{}".format(inc_timeouts_tuple[1][0],
                                   inc_timeouts_tuple[1][1]),
                    "{}/{}".format(ora_timeout[1][0], ora_timeout[1][1]),
                ]
            elif oracle:
                values = [
                    "{}/{}".format(def_timeouts_tuple[0],
                                   def_timeouts_tuple[1]),
                    "{}/{}".format(inc_timeouts_tuple[0],
                                   inc_timeouts_tuple[1]),
                    "{}/{}".format(ora_timeout[0], ora_timeout[1])
                ]
            else:
                values = [
                    "{}/{}".format(def_timeouts_tuple[0],
                                   def_timeouts_tuple[1]),
                    "{}/{}".format(inc_timeouts_tuple[0],
                                   inc_timeouts_tuple[1]),
                ]
            if train or test:
                values.append(p_value_timeouts)
            array.append(values)

        array = np.array(array)
        columns = ['Default', 'Incumbent']
        if oracle:
            columns.append('Oracle')
        if train and test:
            columns = columns + columns
        if train or test:
            columns.append('p-value')
        self.logger.debug(array)
        self.logger.debug(columns)
        df = DataFrame(data=array, index=metrics, columns=columns)
        table = df.to_html()
        if train and test:
            # Insert two-column-header
            table = table.split(sep='</thead>', maxsplit=1)[1]
            new_table = "<table border=\"3\" class=\"dataframe\">\n"\
                        "  <col>\n"\
                        "  <colgroup span=\"2\"></colgroup>\n"\
                        "  <colgroup span=\"2\"></colgroup>\n"\
                        "  <thead>\n"\
                        "    <tr>\n"\
                        "      <td rowspan=\"2\"></td>\n"\
                        "      <th colspan=\"3\" scope=\"colgroup\">Train</th>\n"\
                        "      <th colspan=\"3\" scope=\"colgroup\">Test</th>\n"\
                        "      <th colspan=\"1\" scope=\"colgroup\">p-value</th>\n"\
                        "    </tr>\n"\
                        "    <tr>\n"\
                        "      <th scope=\"col\">Default</th>\n"\
                        "      <th scope=\"col\">Incumbent</th>\n"\
                        "      <th scope=\"col\">Oracle</th>\n"\
                        "      <th scope=\"col\">Default</th>\n"\
                        "      <th scope=\"col\">Incumbent</th>\n"\
                        "      <th scope=\"col\">Oracle</th>\n"\
                        "    </tr>\n"\
                        "</thead>\n"
            table = new_table + table

        self.table = table
        self.dataframe = df
        return df
Example #15
0
    def plot_interactive_footprint(self):
        """Use bokeh to create an interactive algorithm footprint with zoom and
        hover tooltips. Should avoid problems with overplotting (since we can
        zoom) and provide better information about instances."""
        features = np.array(self.features_2d)
        instances = self.insts
        runhistory = self.rh
        algo = {v: k for k, v in self.algo_name.items()}
        incumbent = algo['incumbent']
        default = algo['default']
        source = ColumnDataSource(data=dict(x=features[:, 0], y=features[:,
                                                                         1]))
        # Add all necessary information for incumbent and default
        source.add(instances, 'instance_name')
        instance_set = [
            'train' if i in self.train_feats.keys() else 'test'
            for i in instances
        ]
        source.add(instance_set, 'instance_set')  # train or test
        for config, name in [(incumbent, 'incumbent'), (default, 'default')]:
            cost = get_cost_dict_for_config(runhistory, config)
            source.add([cost[i] for i in instances], '{}_cost'.format(name))
            # TODO should be in function
            good, bad = self._get_good_bad(config)
            color = [
                1 if idx in good else 0 for idx, i in enumerate(instances)
            ]
            # TODO end
            color = ['blue' if c else 'red' for c in color]
            self.logger.debug("%s colors: %s", name, str(color))
            source.add(color, '{}_color'.format(name))
        source.add(source.data['default_color'], 'color')

        # Define what appears in tooltips
        hover = HoverTool(tooltips=[
            ('instance name', '@instance_name'),
            ('def cost', '@default_cost'),
            ('inc_cost', '@incumbent_cost'),
            ('set', '@instance_set'),
        ])

        # Add radio-button
        def_inc_callback = CustomJS(args=dict(source=source),
                                    code="""
            var data = source.data;
            if (cb_obj.active == 0) {
                data['color'] = data['default_color'];
            } else {
                data['color'] = data['incumbent_color'];
            }
            source.change.emit();
            """)

        def_inc_radio_button = RadioButtonGroup(
            labels=["default", "incumbent"],
            active=0,
            callback=def_inc_callback)

        # Plot
        x_range = DataRange1d(bounds='auto',
                              start=min(features[:, 0]) - 1,
                              end=max(features[:, 0]) + 1)
        y_range = DataRange1d(bounds='auto',
                              start=min(features[:, 1]) - 1,
                              end=max(features[:, 1]) + 1)
        p = figure(
            plot_height=500,
            plot_width=600,
            tools=[hover, 'save', 'wheel_zoom', 'box_zoom', 'pan', 'reset'],
            active_drag='box_zoom',
            x_range=x_range,
            y_range=y_range)
        # Scatter train and test individually to toggle them
        train_view = CDSView(
            source=source,
            filters=[GroupFilter(column_name='instance_set', group='train')])
        test_view = CDSView(
            source=source,
            filters=[GroupFilter(column_name='instance_set', group='test')])
        train = p.scatter(x='x',
                          y='y',
                          source=source,
                          view=train_view,
                          color='color')
        test = p.scatter(x='x',
                         y='y',
                         source=source,
                         view=test_view,
                         color='color')
        p.xaxis.axis_label, p.yaxis.axis_label = 'principal component 1', 'principal component 2'
        p.xaxis.axis_label_text_font_size = p.yaxis.axis_label_text_font_size = "15pt"

        train_test_callback = CustomJS(args=dict(source=source,
                                                 train_view=train,
                                                 test_view=test),
                                       code="""
            var data = source.data;
            if (cb_obj.active == 0) {
                train_view.visible = true;
                test_view.visible = true;
            } else if (cb_obj.active == 1) {
                train_view.visible = true;
                test_view.visible = false;
            } else {
                train_view.visible = false;
                test_view.visible = true;
            }
            """)
        train_test_radio_button = RadioButtonGroup(
            labels=["all", "train", "test"],
            active=0,
            callback=train_test_callback)

        # Export and return
        if self.output_dir:
            path = os.path.join(self.output_dir,
                                "content/images/algorithm_footprint.png")
            export_bokeh(p, path, self.logger)

        layout = column(
            p,
            row(widgetbox(def_inc_radio_button),
                widgetbox(train_test_radio_button)))
        return layout
Example #16
0
    def __init__(self,
                 default: Configuration,
                 incumbent: Configuration,
                 rh: RunHistory,
                 train: List[str],
                 test: List[str],
                 cutoff,
                 output_dir: str):
        """
        Plot the cumulated distribution functions for given configurations,
        plots will share y-axis and if desired x-axis.
        Saves plot to file.

        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations
        train, test: List[str]
            lists with corresponding instances
        cutoff: Union[None, int]
            cutoff for target algorithms, if set
        output_dir: str
            directory to save plots in

        Returns
        -------
        output_fns: List[str]
            list with paths to generated plots
        """
        self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)

        self.output_dir = output_dir

        out_fn = os.path.join(output_dir, 'cdf')
        self.logger.info("... plotting eCDF")
        self.logger.debug("Plot CDF to %s_[train|test].png", out_fn)

        def prepare_data(x_data):
            """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """
            x_data = sorted(x_data)
            y_data = np.array(range(len(x_data))) / (len(x_data) - 1)
            for idx in range(len(x_data)):
                if (cutoff is not None) and (x_data[idx] >= cutoff):
                    x_data[idx] = cutoff
                    y_data[idx] = y_data[idx - 1]
            return (x_data, y_data)

        # Generate y_data
        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()

        output_fns = []

        for insts, name in [(train, 'train'), (test, 'test')]:
            if len(insts) <= 1:
                self.logger.debug("No %s instances, skipping cdf", name)
                continue
            data = [prepare_data(np.array([v for k, v in costs if k in insts])) for costs in [def_costs, inc_costs]]
            x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1])
            labels = ['default ' + name, 'incumbent ' + name]
            output_fns.append(plot_cdf(x, y, labels, timeout=cutoff,
                                       out_fn=out_fn + '_{}.png'.format(name)))

        self.output_fns = output_fns