def __init__(self, config_space: ConfigurationSpace, runhistory, db_type="sqlite", db_params=frozendict(), db_table_name="runhistory"): self.db_table_name = db_table_name self.runhistory = runhistory self.db_type = db_type self.db_params = db_params self.Datebase = get_db_class_by_db_type(self.db_type) self.db: pw.Database = self.Datebase(**self.db_params) self.logger = PickableLoggerAdapter(__name__) # --JSONField----------------------------------------- if self.db_type == "sqlite": from playhouse.sqlite_ext import JSONField self.JSONField = JSONField elif self.db_type == "postgresql": from playhouse.postgres_ext import JSONField self.JSONField = JSONField elif self.db_type == "mysql": from playhouse.mysql_ext import JSONField self.JSONField = JSONField # ----------------------------------------------------- self.Model: pw.Model = self.get_model() self.config_space: ConfigurationSpace = config_space
def __init__( self, aggregate_func: typing.Callable, overwrite_existing_runs: bool = False, file_system=LocalFS(), db_type="sqlite", db_args=None, db_kwargs=None, config_space=None ) -> None: """Constructor Parameters ---------- aggregate_func: callable function to aggregate perf across instances overwrite_existing_runs: bool allows to overwrites old results if pairs of algorithm-instance-seed were measured multiple times """ if db_type == "sqlite": self.db: RunHistoryDB = RunHistoryDB(config_space, self, db_args, db_kwargs) else: raise NotImplementedError() self.file_system = file_system self.logger = PickableLoggerAdapter( self.__module__ + "." + self.__class__.__name__ ) # By having the data in a deterministic order we can do useful tests # when we serialize the data and can assume it's still in the same # order as it was added. self.data = collections.OrderedDict() # type: typing.Dict[RunKey, RunValue] # for fast access, we have also an unordered data structure # to get all instance seed pairs of a configuration self._configid_to_inst_seed = {} # type: typing.Dict[int, InstSeedKey] self.config_ids = {} # type: typing.Dict[Configuration, str] self.ids_config = {} # type: typing.Dict[str, Configuration] # Stores cost for each configuration ID self.cost_per_config = {} # type: typing.Dict[str, float] # runs_per_config maps the configuration ID to the number of runs for that configuration # and is necessary for computing the moving average self.runs_per_config = {} # type: typing.Dict[str, int] # Store whether a datapoint is "external", which means it was read from # a JSON file. Can be chosen to not be written to disk self.external = {} # type: typing.Dict[RunKey, DataOrigin] self.aggregate_func = aggregate_func self.overwrite_existing_runs = overwrite_existing_runs
def __init__(self, config_space: ConfigurationSpace, runhistory, db_type="sqlite", db_params=frozendict(), db_table_name="runhistory"): self.db_table_name = db_table_name self.runhistory = runhistory self.db_type = db_type self.db_params = db_params self.Datebase = get_db_class_by_db_type(self.db_type) self.db: pw.Database = self.Datebase(**self.db_params) self.logger = PickableLoggerAdapter(__name__) # --JSONField----------------------------------------- self.JSONField = get_JSONField(self.db_type) # ----------------------------------------------------- self.Model: pw.Model = self.get_model() self.config_space: ConfigurationSpace = config_space
def __init__(self, model: AbstractEPM): """Constructor Parameters ---------- model : AbstractEPM Models the objective function. """ self.model = model self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__)
def __init__( self, configspace: ConfigurationSpace, types: np.ndarray, bounds: typing.List[typing.Tuple[float, float]], seed: int, instance_features: np.ndarray = None, pca_components: float = None, ): """Constructor Parameters ---------- configspace : ConfigurationSpace Configuration space to tune for. types : np.ndarray (D) Specifies the number of categorical values of an input dimension where the i-th entry corresponds to the i-th input dimension. Let's say we have 2 dimension where the first dimension consists of 3 different categorical choices and the second dimension is continuous than we have to pass np.array([3, 0]). Note that we count starting from 0. bounds : list bounds of input dimensions: (lower, uppper) for continuous dims; (n_cat, np.nan) for categorical dims seed : int The seed that is passed to the model library. instance_features : np.ndarray (I, K) Contains the K dimensional instance features of the I different instances pca_components : float Number of components to keep when using PCA to reduce dimensionality of instance features. Requires to set n_feats (> pca_dims). """ self.configspace = configspace self.seed = seed self.instance_features = instance_features self.pca_components = pca_components if instance_features is not None: self.n_feats = instance_features.shape[1] else: self.n_feats = 0 self.n_params = None # will be updated on train() self.pca = None self.scaler = None if self.pca_components and self.n_feats > self.pca_components: self.pca = PCA(n_components=self.pca_components) self.scaler = MinMaxScaler() # Never use a lower variance than this self.var_threshold = VERY_SMALL_NUMBER self.bounds = bounds self.types = types # Initial types array which is used to reset the type array at every call to train() self._initial_types = types.copy() self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__)
class RunHistory(object): """Container for target algorithm run information. **Note:** Guaranteed to be picklable. Attributes ---------- data : collections.OrderedDict() TODO config_ids : dict Maps config -> id ids_config : dict Maps id -> config cost_per_config : dict Maps config_id -> cost runs_per_config : dict Maps config_id -> number of runs aggregate_func overwrite_existing_runs """ def __init__( self, aggregate_func: typing.Callable, overwrite_existing_runs: bool = False, file_system=LocalFS(), config_space=None, db_type="sqlite", db_params=None, ) -> None: """Constructor Parameters ---------- aggregate_func: callable function to aggregate perf across instances overwrite_existing_runs: bool allows to overwrites old results if pairs of algorithm-instance-seed were measured multiple times """ self.db: RunHistoryDB = RunHistoryDB(config_space, self, db_type, db_params) self.file_system = file_system self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__) # By having the data in a deterministic order we can do useful tests # when we serialize the data and can assume it's still in the same # order as it was added. self.data = collections.OrderedDict( ) # type: typing.Dict[RunKey, RunValue] # for fast access, we have also an unordered data structure # to get all instance seed pairs of a configuration self._configid_to_inst_seed = {} # type: typing.Dict[int, InstSeedKey] self.config_ids = {} # type: typing.Dict[Configuration, str] self.ids_config = {} # type: typing.Dict[str, Configuration] # Stores cost for each configuration ID self.cost_per_config = {} # type: typing.Dict[str, float] # runs_per_config maps the configuration ID to the number of runs for that configuration # and is necessary for computing the moving average self.runs_per_config = {} # type: typing.Dict[str, int] # Store whether a datapoint is "external", which means it was read from # a JSON file. Can be chosen to not be written to disk self.external = {} # type: typing.Dict[RunKey, DataOrigin] self.aggregate_func = aggregate_func self.overwrite_existing_runs = overwrite_existing_runs def add(self, config: Configuration, cost: float, time: float, status: StatusType, instance_id: str = None, seed: int = None, additional_info: dict = None, origin: DataOrigin = DataOrigin.INTERNAL): """Adds a data of a new target algorithm (TA) run; it will update data if the same key values are used (config, instance_id, seed) Parameters ---------- config : dict (or other type -- depending on config space module) Parameter configuration cost: float Cost of TA run (will be minimized) time: float Runtime of TA run status: str Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT} instance_id: str String representing an instance (default: None) seed: int Random seed used by TA (default: None) additional_info: dict Additional run infos (could include further returned information from TA or fields such as start time and host_id) origin: DataOrigin Defines how data will be used. """ if not instance_id: instance_id = None config_id = self.config_ids.get(config) if config_id is None: # it's a new config new_id = get_id_of_config(config) self.config_ids[config] = new_id config_id = self.config_ids.get(config) self.ids_config[new_id] = config k = RunKey(config_id, instance_id, seed) v = RunValue(cost, time, status, additional_info) # Each runkey is supposed to be used only once. Repeated tries to add # the same runkey will be ignored silently if not capped. if self.overwrite_existing_runs or self.data.get(k) is None: self._add(k, v, status, origin) elif status != StatusType.CAPPED and self.data[ k].status == StatusType.CAPPED: # overwrite capped runs with uncapped runs self._add(k, v, status, origin) elif status == StatusType.CAPPED and self.data[ k].status == StatusType.CAPPED and cost > self.data[k].cost: # overwrite if censored with a larger cutoff self._add(k, v, status, origin) def _add(self, k: RunKey, v: RunValue, status: StatusType, origin: DataOrigin): """Actual function to add new entry to data structures TODO """ self.data[k] = v self.external[k] = origin if origin in (DataOrigin.INTERNAL, DataOrigin.EXTERNAL_SAME_INSTANCES) \ and status != StatusType.CAPPED: # also add to fast data structure is_k = InstSeedKey(k.instance_id, k.seed) self._configid_to_inst_seed[ k.config_id] = self._configid_to_inst_seed.get( k.config_id, []) if is_k not in self._configid_to_inst_seed[k.config_id]: self._configid_to_inst_seed[k.config_id].append(is_k) if not self.overwrite_existing_runs: # assumes an average across runs as cost function aggregation self.incremental_update_cost(self.ids_config[k.config_id], v.cost) else: self.update_cost(config=self.ids_config[k.config_id]) def update_cost(self, config: Configuration): """Store the performance of a configuration across the instances in self.cost_perf_config and also updates self.runs_per_config; uses self.aggregate_func Parameters ---------- config: Configuration configuration to update cost based on all runs in runhistory """ inst_seeds = set(self.get_runs_for_config(config)) perf = self.aggregate_func(config, self, inst_seeds) config_id = self.config_ids[config] self.cost_per_config[config_id] = perf self.runs_per_config[config_id] = len(inst_seeds) def compute_all_costs(self, instances: typing.List[str] = None): """Computes the cost of all configurations from scratch and overwrites self.cost_perf_config and self.runs_per_config accordingly; Parameters ---------- instances: typing.List[str] list of instances; if given, cost is only computed wrt to this instance set """ self.cost_per_config = {} self.runs_per_config = {} for config, config_id in self.config_ids.items(): inst_seeds = set(self.get_runs_for_config(config)) if instances is not None: inst_seeds = list( filter(lambda x: x.instance in instances, inst_seeds)) if inst_seeds: # can be empty if never saw any runs on <instances> perf = self.aggregate_func(config, self, inst_seeds) self.cost_per_config[config_id] = perf self.runs_per_config[config_id] = len(inst_seeds) def incremental_update_cost(self, config: Configuration, cost: float): """Incrementally updates the performance of a configuration by using a moving average; Parameters ---------- config: Configuration configuration to update cost based on all runs in runhistory cost: float cost of new run of config """ config_id = self.config_ids[config] n_runs = self.runs_per_config.get(config_id, 0) old_cost = self.cost_per_config.get(config_id, 0.) self.cost_per_config[config_id] = ( (old_cost * n_runs) + cost) / (n_runs + 1) self.runs_per_config[config_id] = n_runs + 1 def get_cost(self, config: Configuration): """Returns empirical cost for a configuration; uses self.cost_per_config Parameters ---------- config: Configuration Returns ------- cost: float Computed cost for configuration """ config_id = self.config_ids[config] return self.cost_per_config.get(config_id, np.nan) def get_runs_for_config(self, config: Configuration): """Return all runs (instance seed pairs) for a configuration. Parameters ---------- config : Configuration from ConfigSpace Parameter configuration Returns ------- instance_seed_pairs : list<tuples of instance, seed> """ config_id = self.config_ids.get(config) return self._configid_to_inst_seed.get(config_id, []) def get_instance_costs_for_config(self, config: Configuration): """ Returns the average cost per instance (across seeds) for a configuration Parameters ---------- config : Configuration from ConfigSpace Parameter configuration Returns ------- cost_per_inst: dict<instance name<str>, cost<float>> """ config_id = self.config_ids.get(config) runs_ = self._configid_to_inst_seed.get(config_id, []) cost_per_inst = {} for inst, seed in runs_: cost_per_inst[inst] = cost_per_inst.get(inst, []) rkey = RunKey(config_id, inst, seed) vkey = self.data[rkey] cost_per_inst[inst].append(vkey.cost) cost_per_inst = dict([(inst, np.mean(costs)) for inst, costs in cost_per_inst.items()]) return cost_per_inst def get_all_configs(self): """Return all configurations in this RunHistory object Returns ------- parameter configurations: list """ return list(self.config_ids.keys()) def empty(self): """Check whether or not the RunHistory is empty. Returns ------- emptiness: bool True if runs have been added to the RunHistory, False otherwise """ return len(self.data) == 0 def save_json(self, fn: str = "runhistory.json", save_external: bool = False): """ saves runhistory on disk Parameters ---------- fn : str file name save_external : bool Whether to save external data in the runhistory file. """ data = [([(k.config_id), str(k.instance_id) if k.instance_id is not None else None, int(k.seed) if k.seed is not None else 0], list(v)) for k, v in self.data.items() if save_external or self.external[k] == DataOrigin.INTERNAL] config_ids_to_serialize = set([entry[0][0] for entry in data]) configs = { id_: conf.get_dictionary() for id_, conf in self.ids_config.items() if id_ in config_ids_to_serialize } config_origins = { id_: conf.origin for id_, conf in self.ids_config.items() if (id_ in config_ids_to_serialize and conf.origin is not None) } txt = json.dumps( { "data": data, "config_origins": config_origins, "configs": configs }, cls=EnumEncoder, indent=2) self.file_system.write_txt(fn, txt) def load_json(self, fn: str, cs: ConfigurationSpace): """Load and runhistory in json representation from disk. Overwrites current runhistory! Parameters ---------- fn : str file name to load from cs : ConfigSpace instance of configuration space """ try: txt = self.file_system.read_txt(fn) all_data = json.loads(txt, object_hook=StatusType.enum_hook) except Exception as e: self.logger.warning( 'Encountered exception %s while reading runhistory from %s. ' 'Not adding any runs!', e, fn, ) return config_origins = all_data.get("config_origins", {}) self.ids_config = {} self.ids_config = {(id_): Configuration(cs, values=values, origin=config_origins.get(id_, None)) for id_, values in all_data["configs"].items()} self.config_ids = { config: id_ for id_, config in self.ids_config.items() } self._n_id = len(self.config_ids) # important to use add method to use all data structure correctly for k, v in all_data["data"]: id_ = (k[0]) if id_ in self.ids_config: self.add(config=self.ids_config[id_], cost=float(v[0]), time=float(v[1]), status=StatusType(v[2]), instance_id=k[1], seed=int(k[2]), additional_info=v[3]) def update_from_json( self, fn: str, cs: ConfigurationSpace, origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES, id_set: set = set(), file_system=LocalFS()): """Update the current runhistory by adding new runs from a json file. Parameters ---------- fn : str File name to load from. cs : ConfigSpace Instance of configuration space. origin : DataOrigin What to store as data origin. """ new_runhistory = RunHistory(self.aggregate_func, file_system=file_system) updated_id_set = new_runhistory.load_json(fn, cs) self.update(runhistory=new_runhistory, origin=origin) return updated_id_set def update(self, runhistory: 'RunHistory', origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES): """Update the current runhistory by adding new runs from a RunHistory. Parameters ---------- runhistory: RunHistory Runhistory with additional data to be added to self origin: DataOrigin If set to ``INTERNAL`` or ``EXTERNAL_FULL`` the data will be added to the internal data structure self._configid_to_inst_seed and be available :meth:`through get_runs_for_config`. """ # Configurations might be already known, but by a different ID. This # does not matter here because the add() method handles this # correctly by assigning an ID to unknown configurations and re-using # the ID for key, value in runhistory.data.items(): config_id, instance_id, seed = key cost, time, status, additional_info = value config = runhistory.ids_config[config_id] self.add(config=config, cost=cost, time=time, status=status, instance_id=instance_id, seed=seed, additional_info=additional_info, origin=origin)
class OutputWriter(object): """Writing scenario to file.""" def __init__(self, file_system=LocalFS()): self.file_system = file_system self.logger = PickableLoggerAdapter(name=self.__module__ + "." + self.__class__.__name__) def write_scenario_file(self, scenario): """Write scenario to a file (format is compatible with input_reader). Will overwrite if file exists. If you have arguments that need special parsing when saving, specify so in the _parse_argument-function. Creates output-dir if necessesary. Parameters ---------- scenario: Scenario Scenario to be written to file Returns ------- status: False or None False indicates that writing process failed """ if scenario.output_dir_for_this_run is None or scenario.output_dir_for_this_run == "": scenario.logger.info("No output directory for scenario logging " "specified -- scenario will not be logged.") return False # Create output-dir if necessary if not self.file_system.isdir(scenario.output_dir_for_this_run): scenario.logger.debug("Output directory does not exist! Will be " "created.") try: self.file_system.mkdir(scenario.output_dir_for_this_run) except OSError: scenario.logger.debug("Could not make output directory.", exc_info=1) raise OSError("Could not make output directory: " "{}.".format(scenario.output_dir_for_this_run)) # options_dest2name maps scenario._arguments from dest -> name options_dest2name = {(scenario._arguments[v]['dest'] if scenario._arguments[v]['dest'] else v): v.lstrip('-').replace('-', '_') for v in scenario._arguments} # Write all options into "output_dir/scenario.txt" path = self.file_system.join(scenario.output_dir_for_this_run, "scenario.txt") scenario.logger.debug("Writing scenario-file to {}.".format(path)) for key in options_dest2name: key = key.lstrip('-').replace('-', '_') new_value = self._parse_argument(scenario, key, getattr(scenario, key)) if new_value is not None: self.file_system.write_txt(path, "{} = {}\n".format(options_dest2name[key], new_value)) def _parse_argument(self, scenario, key: str, value): """Some values of the scenario-file need to be changed upon writing, such as the 'ta' (target algorithm), due to it's callback. Also, the configspace, features, train_inst- and test-inst-lists are saved to output_dir, if they exist. Parameters: ----------- scenario: Scenario Scenario-file to be written key: string Name of the attribute in scenario-file value: Any Corresponding attribute Returns: -------- new value: string The altered value, to be written to file Sideeffects: ------------ - copies files pcs_fn, train_inst_fn, test_inst_fn and feature_fn to output if possible, creates the files from attributes otherwise """ if key in ['pcs_fn', 'train_inst_fn', 'test_inst_fn', 'feature_fn']: # Copy if file exists, else write to new file if value is not None and self.file_system.isfile(value): try: new_path = shutil.copy(value, scenario.output_dir_for_this_run) except shutil.SameFileError: new_path = value # File is already in output_dir # For .pcs-file, also save with the same basename as json and use json-path! if key == 'pcs_fn' and scenario.cs is not None and value.endswith('.pcs'): file_name = self.file_system.splitext(self.file_system.basename(value))[0] new_path = self.file_system.join(scenario.output_dir_for_this_run, file_name + '.json') self.save_configspace(scenario.cs, new_path, 'json') scenario.logger.debug("Setting the pcs_fn-attr of written scenario from %s to %s", value, new_path) elif key == 'pcs_fn' and scenario.cs is not None: try: pcs_path = self.file_system.join(scenario.output_dir_for_this_run, 'configspace.pcs') self.save_configspace(scenario.cs, pcs_path, 'pcs_new') except TypeError: self.logger.error("Could not write pcs file to disk." " ConfigSpace not compatible with (new) pcs format.") new_path = self.file_system.join(scenario.output_dir_for_this_run, 'configspace.json') self.save_configspace(scenario.cs, new_path, 'json') elif key == 'train_inst_fn' and scenario.train_insts != [None]: new_path = self.file_system.join(scenario.output_dir_for_this_run, 'train_insts.txt') self.write_inst_file(scenario.train_insts, new_path) elif key == 'test_inst_fn' and scenario.test_insts != [None]: new_path = self.file_system.join(scenario.output_dir_for_this_run, 'test_insts.txt') self.write_inst_file(scenario.test_insts, new_path) elif key == 'feature_fn' and scenario.feature_dict != {}: new_path = self.file_system.join(scenario.output_dir_for_this_run, 'features.txt') self.write_inst_features_file(scenario.n_features, scenario.feature_dict, new_path) else: return None # New value -> new path return new_path elif key == 'ta' and value is not None: # Reversing the callback on 'ta' (shlex.split) return " ".join(value) elif key in ['train_insts', 'test_insts', 'cs', 'feature_dict']: # No need to log, recreated from files return None else: return value def write_inst_file(self, insts: typing.List[str], fn: str): """Writes instance-list to file. Parameters ---------- insts: list<string> Instance list to be written fn: string Output path """ self.file_system.write_txt(fn, "\n".join(insts)) def write_inst_features_file(self, n_features: int, feat_dict, fn: str): """Writes features to file. Parameters ---------- n_features: int Number of features feat_dict: dict Features to be written fn: string File name of instance feature file """ header = "Instance, " + ", ".join( ["feature" + str(i) for i in range(n_features)]) + "\n" body = [", ".join([inst] + [str(f) for f in feat_dict[inst]]) + "\n" for inst in feat_dict] txt = header + "".join(body) self.file_system.write_txt(fn, txt) def save_configspace(self, cs: ConfigurationSpace, fn: str, output_format: str): """Writing ConfigSpace to file. Parameters ---------- cs : ConfigurationSpace Config-space to be written fn : str Output-file-path output_format : str Output format of the configuration space file. Currently, ``json`` and ``pcs_new`` are supported. """ writers = { 'pcs_new': pcs_new.write, 'json': json.write } writer = writers.get(output_format) if writer: txt = writer(cs) self.file_system.write_txt(fn, txt) else: raise ValueError( "Configuration space output format %s not supported. " "Please choose one of %s" % set(writers.keys()) )
def __init__(self, file_system=LocalFS()): self.file_system = file_system self.logger = PickableLoggerAdapter(name=self.__module__ + "." + self.__class__.__name__)
class RunHistoryDB(): def __init__(self, config_space: ConfigurationSpace, runhistory, db_type="sqlite", db_params=frozendict(), db_table_name="runhistory"): self.db_table_name = db_table_name self.runhistory = runhistory self.db_type = db_type self.db_params = db_params self.Datebase = get_db_class_by_db_type(self.db_type) self.db: pw.Database = self.Datebase(**self.db_params) self.logger = PickableLoggerAdapter(__name__) # --JSONField----------------------------------------- if self.db_type == "sqlite": from playhouse.sqlite_ext import JSONField self.JSONField = JSONField elif self.db_type == "postgresql": from playhouse.postgres_ext import JSONField self.JSONField = JSONField elif self.db_type == "mysql": from playhouse.mysql_ext import JSONField self.JSONField = JSONField # ----------------------------------------------------- self.Model: pw.Model = self.get_model() self.config_space: ConfigurationSpace = config_space def get_model(self) -> pw.Model: class Run_History(pw.Model): run_id = pw.CharField(primary_key=True) config_id = pw.CharField(default="") config = self.JSONField(default={}) config_bin = pw.BitField(default=0) config_origin = pw.TextField(default="") cost = pw.FloatField(default=65535) time = pw.FloatField(default=0.0) instance_id = pw.CharField(default="") seed = pw.IntegerField(default=0) status = pw.IntegerField(default=0) additional_info = pw.CharField(default="") origin = pw.IntegerField(default=0) weight = pw.FloatField(default=0.0) pid = pw.IntegerField(default=os.getpid) timestamp = pw.DateTimeField(default=datetime.datetime.now) class Meta: database = self.db table_name = self.db_table_name self.db.create_tables([Run_History]) return Run_History def get_run_id(self, instance_id, config_id): return instance_id + "-" + config_id def appointment_config(self, config, instance_id) -> bool: config_id = get_id_of_config(config) run_id = self.get_run_id(instance_id, config_id) query = self.Model.select().where(self.Model.run_id == run_id) if query.exists(): return False try: self.Model.create(run_id=run_id, origin=-1) except Exception as e: return False return True def insert_runhistory(self, config: Configuration, cost: float, time: float, status: StatusType, instance_id: str = "", seed: int = 0, additional_info: dict = frozendict(), origin: DataOrigin = DataOrigin.INTERNAL): config_id = get_id_of_config(config) run_id = self.get_run_id(instance_id, config_id) if instance_id is None: instance_id = "" try: self.Model.create( run_id=run_id, config_id=config_id, config=config.get_dictionary(), config_origin=config.origin, config_bin=pickle.dumps(config), cost=cost, time=time, instance_id=instance_id, seed=seed, status=status.value, additional_info=dict(additional_info), origin=origin.value, ) except pw.IntegrityError: self.Model( run_id=run_id, config_id=config_id, config=config.get_dictionary(), config_origin=config.origin, config_bin=pickle.dumps(config), cost=cost, time=time, instance_id=instance_id, seed=seed, status=status.value, additional_info=dict(additional_info), origin=origin.value, ).save() self.timestamp = datetime.datetime.now() def fetch_new_runhistory(self, is_init=False): if is_init: n_del = self.Model.delete().where(self.Model.origin < 0).execute() if n_del > 0: self.logger.info( f"Delete {n_del} invalid records in run_history database.") query = self.Model.select().where(self.Model.origin >= 0) else: query = self.Model.select().where( self.Model.pid != os.getpid()).where(self.Model.origin >= 0) for model in query: run_id = model.run_id config_id = model.config_id config = model.config config_bin = model.config_bin config_origin = model.config_origin cost = model.cost time = model.time instance_id = model.instance_id seed = model.seed status = model.status additional_info = model.additional_info origin = model.origin timestamp = model.timestamp try: config = pickle.loads(config_bin) except Exception as e: self.logger.error( f"{e}\nUsing config json instead to build Configuration.") config = Configuration(self.config_space, values=config, origin=config_origin) try: additional_info = json.loads(additional_info) except Exception as e: self.logger.error(f"{e}\nSet default to additional_info.") additional_info = {} self.runhistory.add(config, cost, time, StatusType(status), instance_id, seed, additional_info, DataOrigin(origin)) self.timestamp = datetime.datetime.now()