def __init__(self, scenario: Scenario, file_system=LocalFS()): """Constructor Parameters ---------- scenario : Scenario output_dir : str """ self.file_system = file_system self.__scenario = scenario self.ta_runs = 0 self.n_configs = 0 self.wallclock_time_used = 0 self.ta_time_used = 0 self.inc_changed = 0 # debug stats self._n_configs_per_intensify = 0 self._n_calls_of_intensify = 0 ## exponential moving average self._ema_n_configs_per_intensifiy = 0 self._EMA_ALPHA = 0.2 self._start_time = None self._logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
def __init__( self, aggregate_func: typing.Callable, overwrite_existing_runs: bool = False, file_system=LocalFS(), db_type="sqlite", db_args=None, db_kwargs=None, config_space=None ) -> None: """Constructor Parameters ---------- aggregate_func: callable function to aggregate perf across instances overwrite_existing_runs: bool allows to overwrites old results if pairs of algorithm-instance-seed were measured multiple times """ if db_type == "sqlite": self.db: RunHistoryDB = RunHistoryDB(config_space, self, db_args, db_kwargs) else: raise NotImplementedError() self.file_system = file_system self.logger = PickableLoggerAdapter( self.__module__ + "." + self.__class__.__name__ ) # By having the data in a deterministic order we can do useful tests # when we serialize the data and can assume it's still in the same # order as it was added. self.data = collections.OrderedDict() # type: typing.Dict[RunKey, RunValue] # for fast access, we have also an unordered data structure # to get all instance seed pairs of a configuration self._configid_to_inst_seed = {} # type: typing.Dict[int, InstSeedKey] self.config_ids = {} # type: typing.Dict[Configuration, str] self.ids_config = {} # type: typing.Dict[str, Configuration] # Stores cost for each configuration ID self.cost_per_config = {} # type: typing.Dict[str, float] # runs_per_config maps the configuration ID to the number of runs for that configuration # and is necessary for computing the moving average self.runs_per_config = {} # type: typing.Dict[str, int] # Store whether a datapoint is "external", which means it was read from # a JSON file. Can be chosen to not be written to disk self.external = {} # type: typing.Dict[RunKey, DataOrigin] self.aggregate_func = aggregate_func self.overwrite_existing_runs = overwrite_existing_runs
def init_data(self): if not self.file_system: self.file_system = LocalFS() self.ml_task = self.data_manager.ml_task if not self.meta_learner: if self.ml_task.mainTask == "classification": self.meta_learner = LogisticRegression(penalty='l2', solver="lbfgs", multi_class="auto", random_state=10) else: self.meta_learner = Lasso() if isinstance(self.dataset_paths, str): self.dataset_paths = [self.dataset_paths]
def __init__(self, output_dir, stats, file_system=LocalFS()): """Constructor Parameters ---------- output_dir: str directory for logging (or None to disable logging) stats: Stats() Stats object """ self.file_system = file_system TrajLogger.static_file_system = file_system self.stats = stats self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.output_dir = output_dir if output_dir is None or output_dir == "": self.output_dir = None self.logger.info("No output directory for trajectory logging " "specified -- trajectory will not be logged.") else: if not self.file_system.isdir(output_dir): try: self.file_system.mkdir(output_dir) except OSError: self.logger.debug("Could not make output directory.", exc_info=1) raise OSError("Could not make output directory: " "{}.".format(output_dir)) self.old_traj_fn = self.file_system.join(output_dir, "traj_old.csv") if not self.file_system.isfile(self.old_traj_fn): txt = ('"CPU Time Used","Estimated Training Performance",' '"Wallclock Time","Incumbent ID",' '"Automatic Configurator (CPU) Time",' '"Configuration..."\n') self.file_system.write_txt(self.old_traj_fn, txt) self.aclib_traj_fn = self.file_system.join(output_dir, "traj_aclib2.json") self.trajectory = []
def __init__(self, project_path=None, file_system=None, max_persistent_model=50, persistent_mode="fs", db_type="sqlite"): self.persistent_mode = persistent_mode assert self.persistent_mode in ("fs", "db") self.db_type = db_type self.max_persistent_model = max_persistent_model if not file_system: file_system = LocalFS() self.file_system = file_system if not project_path: project_path = os.getcwd( ) + f'''/auto-pipeline-{time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())}''' self.project_path = project_path self.file_system.mkdir(self.project_path)
def update_from_json(self, fn: str, cs: ConfigurationSpace, origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES, id_set: set = set(), file_system=LocalFS()): """Update the current runhistory by adding new runs from a json file. Parameters ---------- fn : str File name to load from. cs : ConfigSpace Instance of configuration space. origin : DataOrigin What to store as data origin. """ new_runhistory = RunHistory(self.aggregate_func, file_system=file_system) updated_id_set = new_runhistory.load_json(fn, cs) self.update(runhistory=new_runhistory, origin=origin) return updated_id_set
def __init__(self, scenario=None, cmd_options: dict = None, runtime='local', runtime_config=None, initial_runs=20, filter_callback: typing.Optional[typing.Callable] = None, after_run_callback: typing.Optional[typing.Callable] = None, db_type="sqlite", db_params=None, anneal_func=None): """ Creates a scenario-object. The output_dir will be "output_dir/run_id/" and if that exists already, the old folder and its content will be moved (without any checks on whether it's still used by another process) to "output_dir/run_id.OLD". If that exists, ".OLD"s will be appended until possible. Parameters ---------- scenario : str or dict or None If str, it will be interpreted as to a path a scenario file If dict, it will be directly to get all scenario related information If None, only cmd_options will be used cmd_options : dict Options from parsed command line arguments """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) if isinstance(anneal_func, str): try: anneal_func = eval(anneal_func) except Exception as e: self.logger.error("Can not eval anneal_func\n" + str(e)) anneal_func = None self.anneal_func = anneal_func self.db_params = db_params self.db_type = db_type self.after_run_callback = after_run_callback self.filter_callback = filter_callback self.initial_runs = initial_runs self.runtime_config: dict = runtime_config self.runtime = runtime if self.runtime == 'hdfs': self.file_system = HDFS( self.runtime_config.get('hdfs_url', 'http://0.0.0.0:50070')) else: self.file_system = LocalFS() self.PCA_DIM = 7 self.in_reader = InputReader() self.out_writer = OutputWriter(self.file_system) self.output_dir_for_this_run = None self._arguments = {} self._arguments.update(CMDReader().scen_cmd_actions) if scenario is None: scenario = {} if isinstance(scenario, str): scenario_fn = scenario scenario = {} if cmd_options: scenario.update(cmd_options) cmd_reader = CMDReader() self.logger.info("Reading scenario file: %s", scenario_fn) smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd( scenario, scenario_fn) scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) elif isinstance(scenario, dict): scenario = copy.copy(scenario) if cmd_options: scenario.update(cmd_options) cmd_reader = CMDReader() smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd( scenario) scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) else: raise TypeError( "Wrong type of scenario (str or dict are supported)") for arg_name, arg_value in scenario.items(): setattr(self, arg_name, arg_value) self._transform_arguments() self.logger.debug("SMAC and Scenario Options:") if cmd_options: for arg_name, arg_value in cmd_options.items(): if isinstance(arg_value, (int, str, float)): self.logger.debug("%s = %s" % (arg_name, arg_value))
def __init__(self, file_system=LocalFS()): self.file_system = file_system self.logger = PickableLoggerAdapter(name=self.__module__ + "." + self.__class__.__name__)
class TrajLogger(object): """Writes trajectory logs files and creates output directory if not exists already Attributes ---------- stats logger output_dir aclib_traj_fn old_traj_fn trajectory """ static_file_system = LocalFS() def __init__(self, output_dir, stats, file_system=LocalFS()): """Constructor Parameters ---------- output_dir: str directory for logging (or None to disable logging) stats: Stats() Stats object """ self.file_system = file_system TrajLogger.static_file_system = file_system self.stats = stats self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.output_dir = output_dir if output_dir is None or output_dir == "": self.output_dir = None self.logger.info("No output directory for trajectory logging " "specified -- trajectory will not be logged.") else: if not self.file_system.isdir(output_dir): try: self.file_system.mkdir(output_dir) except OSError: self.logger.debug("Could not make output directory.", exc_info=1) raise OSError("Could not make output directory: " "{}.".format(output_dir)) self.old_traj_fn = self.file_system.join(output_dir, "traj_old.csv") if not self.file_system.isfile(self.old_traj_fn): txt = ('"CPU Time Used","Estimated Training Performance",' '"Wallclock Time","Incumbent ID",' '"Automatic Configurator (CPU) Time",' '"Configuration..."\n') self.file_system.write_txt(self.old_traj_fn, txt) self.aclib_traj_fn = self.file_system.join(output_dir, "traj_aclib2.json") self.trajectory = [] def add_entry(self, train_perf: float, incumbent_id: int, incumbent: Configuration): """Adds entries to trajectory files (several formats) with using the same timestamps for each entry Parameters ---------- train_perf: float estimated performance on training (sub)set incumbent_id: int id of incumbent incumbent: Configuration() current incumbent configuration """ ta_runs = self.stats.ta_runs ta_time_used = self.stats.ta_time_used wallclock_time = self.stats.get_used_wallclock_time() self.trajectory.append( TrajEntry(train_perf, incumbent_id, incumbent, ta_runs, ta_time_used, wallclock_time)) if self.output_dir is not None: self._add_in_old_format(train_perf, incumbent_id, incumbent, ta_time_used, wallclock_time) self._add_in_aclib_format(train_perf, incumbent_id, incumbent, ta_time_used, wallclock_time) def _add_in_old_format(self, train_perf: float, incumbent_id: int, incumbent: Configuration, ta_time_used: float, wallclock_time: float): """Adds entries to old SMAC2-like trajectory file Parameters ---------- train_perf: float Estimated performance on training (sub)set incumbent_id: int Id of incumbent incumbent: Configuration() Current incumbent configuration ta_time_used: float CPU time used by the target algorithm wallclock_time: float Wallclock time used so far """ conf = [] for p in incumbent: if not incumbent.get(p) is None: conf.append("%s='%s'" % (p, repr(incumbent[p]))) txt = ("%f, %f, %f, %d, %f, %s\n" % (ta_time_used, train_perf, wallclock_time, incumbent_id, wallclock_time - ta_time_used, ", ".join(conf))) self.file_system.write_txt(self.old_traj_fn, txt, append=True) def _add_in_aclib_format(self, train_perf: float, incumbent_id: int, incumbent: Configuration, ta_time_used: float, wallclock_time: float): """Adds entries to AClib2-like trajectory file Parameters ---------- train_perf: float Estimated performance on training (sub)set incumbent_id: int Id of incumbent incumbent: Configuration() Current incumbent configuration ta_time_used: float CPU time used by the target algorithm wallclock_time: float Wallclock time used so far """ conf = [] for p in incumbent: if not incumbent.get(p) is None: conf.append("%s='%s'" % (p, repr(incumbent[p]))) traj_entry = { "cpu_time": ta_time_used, "total_cpu_time": None, # TODO: fix this "wallclock_time": wallclock_time, "evaluations": self.stats.ta_runs, "cost": train_perf, "incumbent": conf } try: traj_entry["origin"] = incumbent.origin except AttributeError: traj_entry["origin"] = "UNKNOWN" txt = json.dumps(traj_entry) + "\n" self.file_system.write_txt(self.aclib_traj_fn, txt, True) @staticmethod def read_traj_aclib_format(fn: str, cs: ConfigurationSpace): """Reads trajectory from file Parameters ---------- fn: str Filename with saved runhistory in self._add_in_aclib_format format cs: ConfigurationSpace Configuration Space to translate dict object into Confiuration object Returns ------- trajectory: list Each entry in the list is a dictionary of the form { "cpu_time": float, "total_cpu_time": None, # TODO "wallclock_time": float, "evaluations": int "cost": float, "incumbent": Configuration } """ trajectory = [] fp = TrajLogger.static_file_system.read_txt(fn).splitlines() for line in fp: entry = json.loads(line) entry["incumbent"] = TrajLogger._convert_dict_to_config( entry["incumbent"], cs=cs) trajectory.append(entry) return trajectory @staticmethod def _convert_dict_to_config(config_list: typing.List[str], cs: ConfigurationSpace): # CAN BE DONE IN CONFIGSPACE """Since we save a configurations in a dictionary str->str we have to try to figure out the type (int, float, str) of each parameter value Parameters ---------- config_list: typing.List[str] Configuration as a list of "str='str'" cs: ConfigurationSpace Configuration Space to translate dict object into Confiuration object """ config_dict = {} for param in config_list: k, v = param.split("=") v = v.strip("'") hp = cs.get_hyperparameter(k) if isinstance(hp, FloatHyperparameter): v = float(v) elif isinstance(hp, IntegerHyperparameter): v = int(v) config_dict[k] = v config = Configuration(configuration_space=cs, values=config_dict) config.origin = "External Trajectory" return config