def get_oracle(self, instances, rh): """Estimation of oracle performance. Collects best performance seen for each instance in any run. Parameters ---------- instances: List[str] list of instances in question rh: RunHistory or List[RunHistory] runhistory or list of runhistories (will be combined) Results ------- oracle: dict[str->float] best seen performance per instance {inst : performance} """ if isinstance(rh, list): rh = combine_runhistories(rh) self.logger.debug("Calculating oracle performance") oracle = {} for c in rh.get_all_configs(): costs = get_cost_dict_for_config(rh, c) for i in costs.keys(): if i not in oracle: oracle[i] = costs[i] elif oracle[i] > costs[i]: oracle[i] = costs[i] return oracle
def __init__(self, scenario: Scenario, rhs: RunHistory, incs: list=None, final_incumbent=None, rh_labels=None, max_plot: int=-1, contour_step_size=0.2, use_timeslider: bool=False, num_quantiles: int=10, timeslider_log: bool=True, output_dir: str=None, ): ''' Creating an interactive plot, visualizing the configuration search space. The runhistories are correlated to the individual runs. Each run consists of a runhistory (in the smac-format), a list of incumbents If the dict "additional_info" in the RunValues of the runhistory contains a nested dict with additional_info["timestamps"]["finished"], using those timestamps to sort data Parameters ---------- scenario: Scenario scenario rhs: List[RunHistory] runhistories from configurator runs, only data collected during optimization (no validation!) incs: List[List[Configuration]] incumbents per run, last entry is final incumbent final_incumbent: Configuration final configuration (best of all runs) max_plot: int maximum number of configs to plot, if -1 plot all contour_step_size: float step size of meshgrid to compute contour of fitness landscape use_timeslider: bool whether or not to have a time_slider-widget on cfp-plot INCREASES FILE-SIZE DRAMATICALLY num_quantiles: int number of quantiles for the slider/ number of static pictures timeslider_log: bool whether to use a logarithmic scale for the timeslider/quantiles output_dir: str output directory ''' self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.scenario = scenario self.rhs = rhs self.combined_rh = combine_runhistories(self.rhs) self.incs = incs self.rh_labels = rh_labels if rh_labels else [str(idx) for idx in range(len(self.rhs))] self.max_plot = max_plot self.use_timeslider = use_timeslider self.num_quantiles = num_quantiles self.contour_step_size = contour_step_size self.output_dir = output_dir self.timeslider_log = timeslider_log # Preprocess input self.default = scenario.cs.get_default_configuration() self.final_incumbent = final_incumbent self.configs_in_run = {label : rh.get_all_configs() for label, rh in zip(self.rh_labels, self.rhs)}
def __init__(self, scenario, runs, runhistory, final_incumbent, output_dir, max_confs=1000, use_timeslider=False, num_quantiles=10, timeslider_log: bool = True, reduction_method: str = "classic"): """Plot the visualization of configurations, highlighting the incumbents. Using original rh, so the explored configspace can be estimated. Parameters ---------- scenario: Scenario deepcopy of scenario-object runs: List[ConfiguratorRun] holding information about original runhistories, trajectories, incumbents, etc. runhistory: RunHistory with maximum number of real (not estimated) runs to train best-possible epm final_incumbent: Configuration final incumbent (best of all (highest budget) runs) max_confs: int maximum number of data-points to plot use_timeslider: bool whether or not to have a time_slider-widget on cfp-plot INCREASES FILE-SIZE DRAMATICALLY num_quantiles: int if use_timeslider is not off, defines the number of quantiles for the slider/ number of static pictures timeslider_log: bool whether to use a logarithmic scale for the timeslider/quantiles Returns ------- script: str script part of bokeh plot div: str div part of bokeh plot over_time_paths: List[str] list with paths to the different quantiled timesteps of the configurator run (for static evaluation) """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.logger.info( "... visualizing explored configspace (this may take " "a long time, if there is a lot of data - deactive with --no_configurator_footprint)" ) self.scenario = scenario self.runs = runs self.runhistory = runhistory if runhistory else combine_runhistories( [r.combined_runhistory for r in runs]) self.final_incumbent = final_incumbent self.output_dir = output_dir self.max_confs = max_confs self.use_timeslider = use_timeslider self.num_quantiles = num_quantiles self.timeslider_log = timeslider_log self.reduction_method = reduction_method if scenario.feature_array is None: scenario.feature_array = np.array([[]]) # Sort runhistories and incs wrt cost incumbents = list(map(lambda x: x['incumbent'], runs[0].traj)) assert (incumbents[-1] == runs[0].traj[-1]['incumbent']) self.cfp = ConfiguratorFootprintPlotter( scenario=self.scenario, rhs=[r.original_runhistory for r in self.runs], incs=[[entry['incumbent'] for entry in r.traj] for r in self.runs], final_incumbent=self.final_incumbent, rh_labels=[ os.path.basename(r.folder).replace('_', ' ') for r in self.runs ], max_plot=self.max_confs, use_timeslider=self.use_timeslider and self.num_quantiles > 1, num_quantiles=self.num_quantiles, timeslider_log=self.timeslider_log, output_dir=self.output_dir, reduction_method=reduction_method) # Julia BA