def _permutation_test(self, epm_rh, default, incumbent, num_permutations, par=1): if par != 1 and not self.scenario.cutoff: return np.nan cutoff = self.scenario.cutoff def_cost = get_cost_dict_for_config(epm_rh, default, par=par, cutoff=cutoff) inc_cost = get_cost_dict_for_config(epm_rh, incumbent, par=par, cutoff=cutoff) data1, data2 = zip(*[(def_cost[i], inc_cost[i]) for i in def_cost.keys()]) p = paired_permutation(data1, data2, self.rng, num_permutations=num_permutations, logger=self.logger) self.logger.debug( "p-value for def/inc-difference: %f (permutation test " "with %d permutations and par %d)", p, num_permutations, par) return p
def plot_cdf_compare(self, default: Configuration, incumbent: Configuration, rh: RunHistory): """ Plot the cumulated distribution functions for given configurations, plots will share y-axis and if desired x-axis. Saves plot to file. Parameters ---------- default, incumbent: Configuration configurations to be compared rh: RunHistory runhistory to use for cost-estimations Returns ------- output_fns: List[str] list with paths to generated plots """ out_fn = os.path.join(self.output_dir, 'cdf') self.logger.info("... plotting eCDF") self.logger.debug("Plot CDF to %s_[train|test].png", out_fn) timeout = self.scenario.cutoff def prepare_data(x_data): """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """ x_data = sorted(x_data) y_data = np.array(range(len(x_data))) / (len(x_data) - 1) for idx in range(len(x_data)): if (timeout is not None) and (x_data[idx] >= timeout): x_data[idx] = timeout y_data[idx] = y_data[idx - 1] return (x_data, y_data) # Generate y_data def_costs = get_cost_dict_for_config(rh, default).items() inc_costs = get_cost_dict_for_config(rh, incumbent).items() train, test = self.scenario.train_insts, self.scenario.test_insts output_fns = [] for insts, name in [(train, 'train'), (test, 'test')]: if insts == [None]: self.logger.debug("No %s instances, skipping cdf", name) continue data = [ prepare_data(np.array([v for k, v in costs if k in insts])) for costs in [def_costs, inc_costs] ] x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1]) labels = ['default ' + name, 'incumbent ' + name] output_fns.append( plot_cdf(x, y, labels, timeout=self.scenario.cutoff, out_fn=out_fn + '_{}.png'.format(name))) return output_fns
def __init__(self, original_rh, validated_rh, default, incumbent, train_test, scenario, validator, output, max_pimp_samples, fanova_pairwise=True): """ Parameters ---------- original_rh: RunHistory runhistory containing all runs that have actually been run validated_rh: RunHistory runhistory containing all runs from original_rh + estimates for default and all incumbents for all instances default, incumbent: Configuration default and overall incumbent train_test: bool whether is distinction is made (in cdf and scatter) scenario: Scenario the scenario object validator: Validator validator object (to estimate using EPM) output: string output-directory """ self.logger = logging.getLogger("cave.analyzer") # Important objects for analysis self.original_rh = original_rh self.validated_rh = validated_rh self.default = default self.incumbent = incumbent self.train_test = train_test self.scenario = scenario self.validator = validator self.pimp = None # PIMP object for reuse self.feat_analysis = None # feat_analysis object for reuse self.evaluators = [] self.output = output self.importance = None # Used to store dictionary containing parameter # importances, so it can be used by analysis self.feat_importance = None # Used to store dictionary w feat_imp conf1_runs = get_cost_dict_for_config(self.validated_rh, self.default) conf2_runs = get_cost_dict_for_config(self.validated_rh, self.incumbent) self.plotter = Plotter(self.scenario, self.train_test, conf1_runs, conf2_runs, output=self.output) self.max_pimp_samples = max_pimp_samples self.fanova_pairwise = fanova_pairwise
def _paired_t_test(self, epm_rh, default, incumbent, num_permutations): def_cost, inc_cost = get_cost_dict_for_config( epm_rh, default), get_cost_dict_for_config(epm_rh, incumbent) data1, data2 = zip(*[(def_cost[i], inc_cost[i]) for i in def_cost.keys()]) p = paired_t_student(data1, data2, logger=self.logger) self.logger.debug("p-value for def/inc-difference: %f (paired t-test)", p) return p
def __init__(self, default: Configuration, incumbent: Configuration, rh: RunHistory, train: List[str], test: Union[List[str], None], run_obj: str, cutoff, output_dir: int): """ Creates a scatterplot of the two configurations on the given set of instances. Saves plot to file. Parameters ---------- default, incumbent: Configuration configurations to be compared rh: RunHistory runhistory to use for cost-estimations output_dir: str output directory Returns ------- output_fns: List[str] list with paths to generated plots """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) out_fn_base = os.path.join(output_dir, 'scatter_') self.logger.info("... plotting scatter") self.logger.debug("Plot scatter to %s[train|test].png", out_fn_base) metric = run_obj timeout = cutoff labels = ["default {}".format(run_obj), "incumbent {}".format(run_obj)] def_costs = get_cost_dict_for_config(rh, default).items() inc_costs = get_cost_dict_for_config(rh, incumbent).items() out_fns = [] for insts, name in [(train, 'train'), (test, 'test')]: if insts == [None]: self.logger.debug("No %s instances, skipping scatter", name) continue default = np.array([v for k, v in def_costs if k in insts]) incumbent = np.array([v for k, v in inc_costs if k in insts]) min_val = min(min(default), min(incumbent)) out_fn = out_fn_base + name + '.png' out_fns.append( plot_scatter_plot((default, ), (incumbent, ), labels, metric=metric, min_val=min_val, max_val=timeout, out_fn=out_fn)) self.output_fns = out_fns
def _plot_ecdf(self, default: Configuration, incumbent: Configuration, rh: RunHistory, train: List[str], test: List[str], cutoff, output_dir: str): """ Parameters ---------- default, incumbent: Configuration configurations to be compared rh: RunHistory runhistory to use for cost-estimations train, test: List[str] lists with corresponding instances cutoff: Union[None, int] cutoff for target algorithms, if set output_dir: str directory to save plots in """ out_fn_base = os.path.join(output_dir, 'cdf') self.logger.info("... plotting eCDF") def prepare_data(x_data): """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """ x_data = sorted(x_data) y_data = np.array(range(len(x_data))) / (len(x_data) - 1) for idx in range(len(x_data)): if (cutoff is not None) and (x_data[idx] >= cutoff): x_data[idx] = cutoff y_data[idx] = y_data[idx - 1] return (x_data, y_data) # Generate y_data def_costs = get_cost_dict_for_config(rh, default).items() inc_costs = get_cost_dict_for_config(rh, incumbent).items() output_fns = [] if len(train) <= 1 and len(test) <= 1: raise NotApplicable("No instances, so no eCDF-plot.") for insts, name in [(train, 'train'), (test, 'test')]: if len(insts) <= 1: self.logger.debug("No %s instances, skipping cdf", name) continue data = [ prepare_data(np.array([v for k, v in costs if k in insts])) for costs in [def_costs, inc_costs] ] x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1]) labels = ['default ' + name, 'incumbent ' + name] out_fn = out_fn_base + '_{}.png'.format(name) output_fns.append( plot_cdf(x, y, labels, timeout=cutoff, out_fn=out_fn)) self.logger.debug("Plotted eCDF to %s", out_fn) return {'figure': output_fns if len(output_fns) > 0 else None}
def plot_scatter(self, default: Configuration, incumbent: Configuration, rh: RunHistory): """ Creates a scatterplot of the two configurations on the given set of instances. Saves plot to file. Parameters ---------- default, incumbent: Configuration configurations to be compared rh: RunHistory runhistory to use for cost-estimations Returns ------- output_fns: List[str] list with paths to generated plots """ out_fn_base = os.path.join(self.output_dir, 'scatter_') self.logger.info("... plotting scatter") self.logger.debug("Plot scatter to %s[train|test].png", out_fn_base) metric = self.scenario.run_obj timeout = self.scenario.cutoff labels = [ "default {}".format(self.scenario.run_obj), "incumbent {}".format(self.scenario.run_obj) ] def_costs = get_cost_dict_for_config(rh, default).items() inc_costs = get_cost_dict_for_config(rh, incumbent).items() train, test = self.scenario.train_insts, self.scenario.test_insts out_fns = [] for insts, name in [(train, 'train'), (test, 'test')]: if insts == [None]: self.logger.debug("No %s instances, skipping scatter", name) continue default = np.array([v for k, v in def_costs if k in insts]) incumbent = np.array([v for k, v in inc_costs if k in insts]) min_val = min(min(default), min(incumbent)) out_fn = out_fn_base + name + '.png' out_fns.append( plot_scatter_plot((default, ), (incumbent, ), labels, metric=metric, min_val=min_val, max_val=timeout, out_fn=out_fn)) return out_fns
def _plot_scatter(self, default: Configuration, incumbent: Configuration, rh: RunHistory, train: List[str], test: Union[List[str], None], run_obj: str, cutoff, output_dir): """ Parameters ---------- default, incumbent: Configuration configurations to be compared rh: RunHistory runhistory to use for cost-estimations train[, test]: list(str) instance-names run_obj: str run-objective (time or quality) cutoff: float maximum runtime of ta output_dir: str output directory """ out_fn_base = os.path.join(output_dir, 'scatter_') self.logger.info("... plotting scatter") metric = run_obj timeout = cutoff labels = ["default {}".format(run_obj), "incumbent {}".format(run_obj)] def_costs = get_cost_dict_for_config(rh, default).items() inc_costs = get_cost_dict_for_config(rh, incumbent).items() out_fns = [] if len(train) <= 1 and len(test) <= 1: raise NotApplicable("No instances, so no scatter-plot.") for insts, name in [(train, 'train'), (test, 'test')]: if len(insts) <= 1: self.logger.debug("No %s instances, skipping scatter", name) continue default = np.array([v for k, v in def_costs if k in insts]) incumbent = np.array([v for k, v in inc_costs if k in insts]) min_val = min(min(default), min(incumbent)) out_fn = out_fn_base + name + '.png' out_fns.append(plot_scatter_plot((default,), (incumbent,), labels, metric=metric, min_val=min_val, max_val=timeout, out_fn=out_fn)) self.logger.debug("Plotted scatter to %s", out_fn) return {'figure' : out_fns if len(out_fns) > 0 else None}
def get_oracle(self, instances, rh): """Estimation of oracle performance. Collects best performance seen for each instance in any run. Parameters ---------- instances: List[str] list of instances in question rh: RunHistory or List[RunHistory] runhistory or list of runhistories (will be combined) Results ------- oracle: dict[str->float] best seen performance per instance {inst : performance} """ if isinstance(rh, list): rh = combine_runhistories(rh) self.logger.debug("Calculating oracle performance") oracle = {} for c in rh.get_all_configs(): costs = get_cost_dict_for_config(rh, c) for i in costs.keys(): if i not in oracle: oracle[i] = costs[i] elif oracle[i] > costs[i]: oracle[i] = costs[i] return oracle
def get_performance(self, algorithm, instance): """ Return performance according to (possibly EPM-)validated runhistory. """ if not algorithm in self.algo_performance: self.algo_performance[algorithm] = get_cost_dict_for_config(self.rh, algorithm) return self.algo_performance[algorithm][instance]
def create_table(self, incumbents, budget_names, epm_rhs): """Create table. Parameters ---------- incumbents: List[Configuration] incumbents per budget, assuming ascending order budget_names: List[str] budget-names as strings epm_rhs: List[RunHistory] estimated runhistories for budgets, same length and order as incumbents""" self.logger.info("... create performance table") if not (len(incumbents) == len(epm_rhs) and len(incumbents) == len(budget_names)): raise ValueError( "Number of incumbents must equal number of names and runhistories" ) budget_names = [b.split('/')[-1] for b in budget_names] dec_place = 3 # Get costs costs = [] for inc, epm_rh in zip(incumbents, epm_rhs): cost_dict_inc = get_cost_dict_for_config(epm_rh, inc) costs.append(np.mean([float(v) for v in cost_dict_inc.values()])) keys = [ k for k in incumbents[0].keys() if any([inc[k] for inc in incumbents]) ] values = [] for inc, c in zip(incumbents, costs): new_values = [ inc[k] if inc[k] is not None else "inactive" for k in keys ] new_values.append(str(round(c, dec_place))) values.append(new_values) keys.append('Cost') table = list(zip(keys, *values)) keys, table = [k[0] for k in table], [k[1:] for k in table] self.table = df = DataFrame(data=table, columns=budget_names, index=keys) self.html_table = df.to_html()
def _get_cost(self, algorithm, instance=None): """ Return cost according to (possibly EPM-)validated runhistory. Parameters ---------- algorithm: Configuration config instance: str instance name """ if not hasattr(self, '__algo_cost'): self.__algo_cost = { } # Use function self._get_cost!! Maps algo -> {instance -> cost} if algorithm not in self.__algo_cost: #self.logger.debug("Getting cost for %s, using PAR1-score", self.algo_name[algorithm]) self.__algo_cost[algorithm] = get_cost_dict_for_config( self.rh, algorithm) if instance: return self.__algo_cost[algorithm][instance] else: return self.__algo_cost[algorithm]
def get_parX(self, config, par=10): """Calculate parX-values of default and incumbent configs. First determine PAR-timeouts for each run on each instances, Second average over train/test if available, else just average. Parameters ---------- config: Configuration config to be calculated par: int par-factor to use Returns ------- (train, test) OR average -- tuple<float, float> OR float PAR10 values for train- and test-instances, if available as tuple else the general average """ runs = get_cost_dict_for_config(self.validated_rh, config) # Penalize if self.scenario.cutoff: runs = [(k, runs[k]) if runs[k] < self.scenario.cutoff else (k, self.scenario.cutoff * par) for k in runs] else: runs = [(k, runs[k]) for k in runs] self.logger.info("Calculating penalized average runtime without " "cutoff...") # Average if self.train_test: train = np.mean( [c for i, c in runs if i in self.scenario.train_insts]) test = np.mean( [c for i, c in runs if i in self.scenario.test_insts]) return (train, test) else: return np.mean([c for i, c in runs])
def create_performance_table(self, default, incumbent, epm_rh, oracle): """Create table, compare default against incumbent on train-, test- and combined instances. Listing PAR10, PAR1 and timeouts. Distinguishes between train and test, if available.""" self.logger.info("... create performance table") cost_dict_def = get_cost_dict_for_config(epm_rh, default) cost_dict_inc = get_cost_dict_for_config(epm_rh, incumbent) def_par1, inc_par1 = self.get_parX(cost_dict_def, 1), self.get_parX(cost_dict_inc, 1) def_par10, inc_par10 = self.get_parX(cost_dict_def, 10), self.get_parX( cost_dict_inc, 10) ora_par1, ora_par10 = self.get_parX(oracle, 1), self.get_parX(oracle, 10) def_timeouts = get_timeout(epm_rh, default, self.scenario.cutoff) inc_timeouts = get_timeout(epm_rh, incumbent, self.scenario.cutoff) def_timeouts_tuple = self.timeouts_to_tuple(def_timeouts) inc_timeouts_tuple = self.timeouts_to_tuple(inc_timeouts) if self.scenario.cutoff: ora_timeout = self.timeouts_to_tuple( {i: c < self.scenario.cutoff for i, c in oracle.items()}) data1, data2 = zip(*[(int(def_timeouts[i]), int(inc_timeouts[i])) for i in def_timeouts.keys()]) p_value_timeouts = "%.5f" % paired_permutation( data1, data2, self.rng, num_permutations=10000, logger=self.logger) else: ora_timeout = self.timeouts_to_tuple({}) p_value_timeouts = "N/A" # p-values (paired permutation) p_value_par10 = self._permutation_test(epm_rh, default, incumbent, 10000, 10) p_value_par10 = "%.5f" % p_value_par10 if np.isfinite( p_value_par10) else 'N/A' p_value_par1 = self._permutation_test(epm_rh, default, incumbent, 10000, 1) p_value_par1 = "%.5f" % p_value_par1 if np.isfinite( p_value_par1) else 'N/A' dec_place = 3 metrics = [] if self.scenario.run_obj == 'runtime': metrics.append('PAR10') metrics.append('PAR1') else: metrics.append('Quality') if self.scenario.cutoff: metrics.append('Timeouts') train, test = len(self.scenario.train_insts) > 1, len( self.scenario.test_insts) > 1 oracle = train or test # oracle only makes sense with instances # Create table array = [] if 'PAR10' in metrics: if train and test: values = [ def_par10[0], inc_par10[0], ora_par10[0], def_par10[1], inc_par10[1], ora_par10[1] ] elif oracle: values = [def_par10, inc_par10, ora_par10] # oracle only with instances else: values = [def_par10, inc_par10] values = [ round(value, dec_place) if np.isfinite(value) else 'N/A' for value in values ] if train or test: values.append(p_value_par10) array.append(values) if 'PAR1' in metrics or 'Quality' in metrics: if train and test: values = [ def_par1[0], inc_par1[0], ora_par1[0], def_par1[1], inc_par1[1], ora_par1[1] ] elif oracle: values = [def_par1, inc_par1, ora_par1] # oracle only with instances else: values = [def_par1, inc_par1] values = [ round(value, dec_place) if np.isfinite(value) else 'N/A' for value in values ] if train or test: values.append(p_value_par1) array.append(values) if 'Timeouts' in metrics: if train and test: values = [ "{}/{}".format(def_timeouts_tuple[0][0], def_timeouts_tuple[0][1]), "{}/{}".format(inc_timeouts_tuple[0][0], inc_timeouts_tuple[0][1]), "{}/{}".format(ora_timeout[0][0], ora_timeout[0][1]), "{}/{}".format(def_timeouts_tuple[1][0], def_timeouts_tuple[1][1]), "{}/{}".format(inc_timeouts_tuple[1][0], inc_timeouts_tuple[1][1]), "{}/{}".format(ora_timeout[1][0], ora_timeout[1][1]), ] elif oracle: values = [ "{}/{}".format(def_timeouts_tuple[0], def_timeouts_tuple[1]), "{}/{}".format(inc_timeouts_tuple[0], inc_timeouts_tuple[1]), "{}/{}".format(ora_timeout[0], ora_timeout[1]) ] else: values = [ "{}/{}".format(def_timeouts_tuple[0], def_timeouts_tuple[1]), "{}/{}".format(inc_timeouts_tuple[0], inc_timeouts_tuple[1]), ] if train or test: values.append(p_value_timeouts) array.append(values) array = np.array(array) columns = ['Default', 'Incumbent'] if oracle: columns.append('Oracle') if train and test: columns = columns + columns if train or test: columns.append('p-value') self.logger.debug(array) self.logger.debug(columns) df = DataFrame(data=array, index=metrics, columns=columns) table = df.to_html() if train and test: # Insert two-column-header table = table.split(sep='</thead>', maxsplit=1)[1] new_table = "<table border=\"3\" class=\"dataframe\">\n"\ " <col>\n"\ " <colgroup span=\"2\"></colgroup>\n"\ " <colgroup span=\"2\"></colgroup>\n"\ " <thead>\n"\ " <tr>\n"\ " <td rowspan=\"2\"></td>\n"\ " <th colspan=\"3\" scope=\"colgroup\">Train</th>\n"\ " <th colspan=\"3\" scope=\"colgroup\">Test</th>\n"\ " <th colspan=\"1\" scope=\"colgroup\">p-value</th>\n"\ " </tr>\n"\ " <tr>\n"\ " <th scope=\"col\">Default</th>\n"\ " <th scope=\"col\">Incumbent</th>\n"\ " <th scope=\"col\">Oracle</th>\n"\ " <th scope=\"col\">Default</th>\n"\ " <th scope=\"col\">Incumbent</th>\n"\ " <th scope=\"col\">Oracle</th>\n"\ " </tr>\n"\ "</thead>\n" table = new_table + table self.table = table self.dataframe = df return df
def plot_interactive_footprint(self): """Use bokeh to create an interactive algorithm footprint with zoom and hover tooltips. Should avoid problems with overplotting (since we can zoom) and provide better information about instances.""" features = np.array(self.features_2d) instances = self.insts runhistory = self.rh algo = {v: k for k, v in self.algo_name.items()} incumbent = algo['incumbent'] default = algo['default'] source = ColumnDataSource(data=dict(x=features[:, 0], y=features[:, 1])) # Add all necessary information for incumbent and default source.add(instances, 'instance_name') instance_set = [ 'train' if i in self.train_feats.keys() else 'test' for i in instances ] source.add(instance_set, 'instance_set') # train or test for config, name in [(incumbent, 'incumbent'), (default, 'default')]: cost = get_cost_dict_for_config(runhistory, config) source.add([cost[i] for i in instances], '{}_cost'.format(name)) # TODO should be in function good, bad = self._get_good_bad(config) color = [ 1 if idx in good else 0 for idx, i in enumerate(instances) ] # TODO end color = ['blue' if c else 'red' for c in color] self.logger.debug("%s colors: %s", name, str(color)) source.add(color, '{}_color'.format(name)) source.add(source.data['default_color'], 'color') # Define what appears in tooltips hover = HoverTool(tooltips=[ ('instance name', '@instance_name'), ('def cost', '@default_cost'), ('inc_cost', '@incumbent_cost'), ('set', '@instance_set'), ]) # Add radio-button def_inc_callback = CustomJS(args=dict(source=source), code=""" var data = source.data; if (cb_obj.active == 0) { data['color'] = data['default_color']; } else { data['color'] = data['incumbent_color']; } source.change.emit(); """) def_inc_radio_button = RadioButtonGroup( labels=["default", "incumbent"], active=0, callback=def_inc_callback) # Plot x_range = DataRange1d(bounds='auto', start=min(features[:, 0]) - 1, end=max(features[:, 0]) + 1) y_range = DataRange1d(bounds='auto', start=min(features[:, 1]) - 1, end=max(features[:, 1]) + 1) p = figure( plot_height=500, plot_width=600, tools=[hover, 'save', 'wheel_zoom', 'box_zoom', 'pan', 'reset'], active_drag='box_zoom', x_range=x_range, y_range=y_range) # Scatter train and test individually to toggle them train_view = CDSView( source=source, filters=[GroupFilter(column_name='instance_set', group='train')]) test_view = CDSView( source=source, filters=[GroupFilter(column_name='instance_set', group='test')]) train = p.scatter(x='x', y='y', source=source, view=train_view, color='color') test = p.scatter(x='x', y='y', source=source, view=test_view, color='color') p.xaxis.axis_label, p.yaxis.axis_label = 'principal component 1', 'principal component 2' p.xaxis.axis_label_text_font_size = p.yaxis.axis_label_text_font_size = "15pt" train_test_callback = CustomJS(args=dict(source=source, train_view=train, test_view=test), code=""" var data = source.data; if (cb_obj.active == 0) { train_view.visible = true; test_view.visible = true; } else if (cb_obj.active == 1) { train_view.visible = true; test_view.visible = false; } else { train_view.visible = false; test_view.visible = true; } """) train_test_radio_button = RadioButtonGroup( labels=["all", "train", "test"], active=0, callback=train_test_callback) # Export and return if self.output_dir: path = os.path.join(self.output_dir, "content/images/algorithm_footprint.png") export_bokeh(p, path, self.logger) layout = column( p, row(widgetbox(def_inc_radio_button), widgetbox(train_test_radio_button))) return layout
def __init__(self, default: Configuration, incumbent: Configuration, rh: RunHistory, train: List[str], test: List[str], cutoff, output_dir: str): """ Plot the cumulated distribution functions for given configurations, plots will share y-axis and if desired x-axis. Saves plot to file. Parameters ---------- default, incumbent: Configuration configurations to be compared rh: RunHistory runhistory to use for cost-estimations train, test: List[str] lists with corresponding instances cutoff: Union[None, int] cutoff for target algorithms, if set output_dir: str directory to save plots in Returns ------- output_fns: List[str] list with paths to generated plots """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.output_dir = output_dir out_fn = os.path.join(output_dir, 'cdf') self.logger.info("... plotting eCDF") self.logger.debug("Plot CDF to %s_[train|test].png", out_fn) def prepare_data(x_data): """ Helper function to keep things easy, generates y_data and manages x_data-timeouts """ x_data = sorted(x_data) y_data = np.array(range(len(x_data))) / (len(x_data) - 1) for idx in range(len(x_data)): if (cutoff is not None) and (x_data[idx] >= cutoff): x_data[idx] = cutoff y_data[idx] = y_data[idx - 1] return (x_data, y_data) # Generate y_data def_costs = get_cost_dict_for_config(rh, default).items() inc_costs = get_cost_dict_for_config(rh, incumbent).items() output_fns = [] for insts, name in [(train, 'train'), (test, 'test')]: if len(insts) <= 1: self.logger.debug("No %s instances, skipping cdf", name) continue data = [prepare_data(np.array([v for k, v in costs if k in insts])) for costs in [def_costs, inc_costs]] x, y = (data[0][0], data[1][0]), (data[0][1], data[1][1]) labels = ['default ' + name, 'incumbent ' + name] output_fns.append(plot_cdf(x, y, labels, timeout=cutoff, out_fn=out_fn + '_{}.png'.format(name))) self.output_fns = output_fns