def _get_mean_var_time(self, validator, traj, use_epm, rh): """ Parameters ---------- validator: Validator validator (smac-based) traj: List[Configuraton] trajectory to set in validator use_epm: bool validated or not (no need to use epm if validated) rh: RunHistory ?? Returns ------- mean, var times: List[float] times to plot (x-values) configs """ # TODO kinda important: docstrings, what is this function doing? if validator: validator.traj = traj # set trajectory time, configs = [], [] if use_epm and not self.block_epm: for entry in traj: time.append(entry["wallclock_time"]) configs.append(entry["incumbent"]) # self.logger.debug('Time: %d Runs: %d', time[-1], len(rh.get_runs_for_config(configs[-1]))) self.logger.debug( "Using %d samples (%d distinct) from trajectory.", len(time), len(set(configs))) # Initialize EPM if validator.epm: # not log as validator epm is trained on cost, not log cost epm = validator.epm else: self.logger.debug( "No EPM passed! Training new one from runhistory.") # Train random forest and transform training data (from given rh) # Not using validator because we want to plot uncertainties rh2epm = RunHistory2EPM4Cost(num_params=len( self.scenario.cs.get_hyperparameters()), scenario=self.scenario) X, y = rh2epm.transform(rh) self.logger.debug( "Training model with data of shape X: %s, y: %s", str(X.shape), str(y.shape)) types, bounds = get_types(self.scenario.cs, self.scenario.feature_array) epm = RandomForestWithInstances( self.scenario.cs, types=types, bounds=bounds, seed=self.rng.randint(MAXINT), instance_features=self.scenario.feature_array, ratio_features=1.0) epm.train(X, y) config_array = convert_configurations_to_array(configs) mean, var = epm.predict_marginalized_over_instances(config_array) var = np.zeros(mean.shape) # We don't want to show the uncertainty of the model but uncertainty over multiple optimizer runs # This variance is computed in an outer loop. else: mean, var = [], [] for entry in traj: #self.logger.debug(entry) time.append(entry["wallclock_time"]) configs.append(entry["incumbent"]) costs = _cost(configs[-1], rh, rh.get_runs_for_config(configs[-1])) # self.logger.debug(len(costs), time[-1] if not costs: time.pop() else: mean.append(np.mean(costs)) var.append(0) # No variance over instances mean, var = np.array(mean).reshape(-1, 1), np.array(var).reshape( -1, 1) return mean, var, time, configs
def_dict = def_.get_dictionary() # Switch it around such that statistics about the default are gathered first if configs[0] != def_: tmp = configs[0] configs[0] = configs[1] configs[1] = tmp del tmp logging.info('Found %d configs' % len(configs)) logging.info('Cost per config:') # For each config for config in configs: # gather statistics such as config_dict = config.get_dictionary() costs = np.array(_cost(config, runhist)) # the cost for running on each instance runtime = np.mean(np.array(_runtime(config, runhist))) # the mean runtime cost = np.mean(costs) # the mean cost timeouts = np.sum( costs > scenario.cutoff) # and count the number of timeouts default = config == def_ # This is just cosmetics for the output config_str = config.__repr__().split('\n') # get the string representation num_changed = 0 for idx, line in enumerate(config_str): try: name, value = list(map(lambda x: x.strip(), line.split(', '))) if name in def_dict: changed = config_dict[name] != def_dict[name] else: