def validate(self, config_mode: str = 'def', instance_mode: str = 'test', repetitions: int = 1, n_jobs: int = 1, backend: str = 'threading', runhistory: RunHistory = None, tae: ExecuteTARun = None): """ Validate configs on instances and save result in runhistory. Parameters ---------- config_mode: string what configurations to validate from [def, inc, def+inc, time, all], time means evaluation at timesteps 2^-4, 2^-3, 2^-2, 2^-1, 2^0, 2^1, ... instance_mode: string what instances to use for validation, from [train, test, train+test] repetitions: int number of repetitions in nondeterministic algorithms n_jobs: int number of parallel processes used by joblib runhistory: RunHistory or string or None runhistory to take data from tae: ExecuteTARun tae to be used. if none, will initialize ExecuteTARunOld Returns ------- runhistory: RunHistory runhistory with validated runs """ self.logger.debug( "Validating configs '%s' on instances '%s', repeating %d times" " with %d parallel runs on backend '%s'.", config_mode, instance_mode, repetitions, n_jobs, backend) # Reset runhistory self.rh = RunHistory(average_cost) # Get relevant configurations and instances configs = self._get_configs(config_mode) instances = self._get_instances(instance_mode) # If runhistory is given as string, load into memory if isinstance(runhistory, str): fn = runhistory runhistory = RunHistory(average_cost) runhistory.load_json(fn, self.scen.cs) # Get all runs needed as list runs = self.get_runs(configs, instances, repetitions=repetitions, runhistory=runhistory) # Create new Stats without limits inf_scen = Scenario({ 'run_obj': self.scen.run_obj, 'cutoff_time': self.scen.cutoff, 'output_dir': None }) inf_stats = Stats(inf_scen) inf_stats.start_timing() # Create TAE if not tae: tae = ExecuteTARunOld(ta=self.scen.ta, stats=inf_stats, run_obj=self.scen.run_obj, par_factor=self.scen.par_factor, cost_for_crash=self.scen.cost_for_crash) else: # Inject endless-stats tae.stats = inf_stats # Validate! run_results = self._validate_parallel(tae, runs, n_jobs, backend) # tae returns (status, cost, runtime, additional_info) # Add runs to RunHistory idx = 0 for result in run_results: self.rh.add(config=runs[idx]['config'], cost=result[1], time=result[2], status=result[0], instance_id=runs[idx]['inst'], seed=runs[idx]['seed'], additional_info=result[3]) idx += 1 # Save runhistory if not self.output.endswith('.json'): old = self.output self.output = os.path.join(self.output, 'validated_runhistory.json') self.logger.debug("Output is \"%s\", changing to \"%s\"!", old, self.output) base = os.path.split(self.output)[0] if not os.path.exists(base): self.logger.debug("Folder (\"%s\") doesn't exist, creating.", base) os.makedirs(base) self.logger.info("Saving validation-results in %s", self.output) self.rh.save_json(self.output) return self.rh
def validate( self, config_mode: Union[str, typing.List[Configuration]] = 'def', instance_mode: Union[str, typing.List[str]] = 'test', repetitions: int = 1, n_jobs: int = 1, backend: str = 'threading', runhistory: RunHistory = None, tae: ExecuteTARun = None, output_fn: str = "", ) -> RunHistory: """ Validate configs on instances and save result in runhistory. If a runhistory is provided as input it is important that you run it on the same/comparable hardware. side effect: if output is specified, saves runhistory to specified output directory. Parameters ---------- config_mode: str or list<Configuration> string or directly a list of Configuration. string from [def, inc, def+inc, wallclock_time, cpu_time, all]. time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: str or list<str> what instances to use for validation, either from [train, test, train+test] or directly a list of instances repetitions: int number of repetitions in nondeterministic algorithms n_jobs: int number of parallel processes used by joblib backend: str what backend joblib should use for parallel runs runhistory: RunHistory optional, RunHistory-object to reuse runs tae: ExecuteTARun tae to be used. if None, will initialize ExecuteTARunOld output_fn: str path to runhistory to be saved. if the suffix is not '.json', will be interpreted as directory and filename will be 'validated_runhistory.json' Returns ------- runhistory: RunHistory runhistory with validated runs """ self.logger.debug( "Validating configs '%s' on instances '%s', repeating %d times" " with %d parallel runs on backend '%s'.", config_mode, instance_mode, repetitions, n_jobs, backend) # Get all runs to be evaluated as list runs, validated_rh = self._get_runs(config_mode, instance_mode, repetitions, runhistory) # Create new Stats without limits inf_scen = Scenario({ 'run_obj': self.scen.run_obj, 'cutoff_time': self.scen.cutoff, 'output_dir': "" }) inf_stats = Stats(inf_scen) inf_stats.start_timing() # Create TAE if not tae: tae = ExecuteTARunOld(ta=self.scen.ta, stats=inf_stats, run_obj=self.scen.run_obj, par_factor=self.scen.par_factor, cost_for_crash=self.scen.cost_for_crash) else: # Inject endless-stats tae.stats = inf_stats # Validate! run_results = self._validate_parallel(tae, runs, n_jobs, backend) # tae returns (status, cost, runtime, additional_info) # Add runs to RunHistory idx = 0 for result in run_results: validated_rh.add(config=runs[idx].config, cost=result[1], time=result[2], status=result[0], instance_id=runs[idx].inst, seed=runs[idx].seed, additional_info=result[3]) idx += 1 if output_fn: self._save_results(validated_rh, output_fn, backup_fn="validated_runhistory.json") return validated_rh