예제 #1
0
    def plot_cdf_compare(self, default: Configuration,
                         incumbent: Configuration, rh: RunHistory):
        """
        Plot the cumulated distribution functions for given configurations,
        plots will share y-axis and if desired x-axis.
        Saves plot to file.

        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations

        Returns
        -------
        output_fns: List[str]
            list with paths to generated plots
        """
        out_fn = os.path.join(self.output_dir, 'cdf')
        self.logger.info("... plotting eCDF")
        self.logger.debug("Plot CDF to %s_[train|test].png", out_fn)

        timeout = self.scenario.cutoff

        def prepare_data(x_data):
            """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """
            x_data = sorted(x_data)
            y_data = np.array(range(len(x_data))) / (len(x_data) - 1)
            for idx in range(len(x_data)):
                if (timeout is not None) and (x_data[idx] >= timeout):
                    x_data[idx] = timeout
                    y_data[idx] = y_data[idx - 1]
            return (x_data, y_data)

        # Generate y_data
        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()
        train, test = self.scenario.train_insts, self.scenario.test_insts

        output_fns = []

        for insts, name in [(train, 'train'), (test, 'test')]:
            if insts == [None]:
                self.logger.debug("No %s instances, skipping cdf", name)
                continue
            data = [
                prepare_data(np.array([v for k, v in costs if k in insts]))
                for costs in [def_costs, inc_costs]
            ]
            x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1])
            labels = ['default ' + name, 'incumbent ' + name]
            output_fns.append(
                plot_cdf(x,
                         y,
                         labels,
                         timeout=self.scenario.cutoff,
                         out_fn=out_fn + '_{}.png'.format(name)))

        return output_fns
예제 #2
0
    def _plot_ecdf(self, default: Configuration, incumbent: Configuration,
                   rh: RunHistory, train: List[str], test: List[str], cutoff,
                   output_dir: str):
        """
        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations
        train, test: List[str]
            lists with corresponding instances
        cutoff: Union[None, int]
            cutoff for target algorithms, if set
        output_dir: str
            directory to save plots in
        """
        out_fn_base = os.path.join(output_dir, 'cdf')
        self.logger.info("... plotting eCDF")

        def prepare_data(x_data):
            """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """
            x_data = sorted(x_data)
            y_data = np.array(range(len(x_data))) / (len(x_data) - 1)
            for idx in range(len(x_data)):
                if (cutoff is not None) and (x_data[idx] >= cutoff):
                    x_data[idx] = cutoff
                    y_data[idx] = y_data[idx - 1]
            return (x_data, y_data)

        # Generate y_data
        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()

        output_fns = []
        if len(train) <= 1 and len(test) <= 1:
            raise NotApplicable("No instances, so no eCDF-plot.")
        for insts, name in [(train, 'train'), (test, 'test')]:
            if len(insts) <= 1:
                self.logger.debug("No %s instances, skipping cdf", name)
                continue
            data = [
                prepare_data(np.array([v for k, v in costs if k in insts]))
                for costs in [def_costs, inc_costs]
            ]
            x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1])
            labels = ['default ' + name, 'incumbent ' + name]
            out_fn = out_fn_base + '_{}.png'.format(name)
            output_fns.append(
                plot_cdf(x, y, labels, timeout=cutoff, out_fn=out_fn))
            self.logger.debug("Plotted eCDF to %s", out_fn)
        return {'figure': output_fns if len(output_fns) > 0 else None}
예제 #3
0
파일: plot_ecdf.py 프로젝트: WrightKD/CAVE
    def __init__(self,
                 default: Configuration,
                 incumbent: Configuration,
                 rh: RunHistory,
                 train: List[str],
                 test: List[str],
                 cutoff,
                 output_dir: str):
        """
        Plot the cumulated distribution functions for given configurations,
        plots will share y-axis and if desired x-axis.
        Saves plot to file.

        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations
        train, test: List[str]
            lists with corresponding instances
        cutoff: Union[None, int]
            cutoff for target algorithms, if set
        output_dir: str
            directory to save plots in

        Returns
        -------
        output_fns: List[str]
            list with paths to generated plots
        """
        self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)

        self.output_dir = output_dir

        out_fn = os.path.join(output_dir, 'cdf')
        self.logger.info("... plotting eCDF")
        self.logger.debug("Plot CDF to %s_[train|test].png", out_fn)

        def prepare_data(x_data):
            """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """
            x_data = sorted(x_data)
            y_data = np.array(range(len(x_data))) / (len(x_data) - 1)
            for idx in range(len(x_data)):
                if (cutoff is not None) and (x_data[idx] >= cutoff):
                    x_data[idx] = cutoff
                    y_data[idx] = y_data[idx - 1]
            return (x_data, y_data)

        # Generate y_data
        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()

        output_fns = []

        for insts, name in [(train, 'train'), (test, 'test')]:
            if len(insts) <= 1:
                self.logger.debug("No %s instances, skipping cdf", name)
                continue
            data = [prepare_data(np.array([v for k, v in costs if k in insts])) for costs in [def_costs, inc_costs]]
            x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1])
            labels = ['default ' + name, 'incumbent ' + name]
            output_fns.append(plot_cdf(x, y, labels, timeout=cutoff,
                                       out_fn=out_fn + '_{}.png'.format(name)))

        self.output_fns = output_fns