def insert_runhistory(self, config: Configuration, cost: float, time: float, status: StatusType, instance_id: str = "", seed: int = 0, additional_info: dict = frozendict(), origin: DataOrigin = DataOrigin.INTERNAL): config_id = get_id_of_config(config) run_id = self.get_run_id(instance_id, config_id) if instance_id is None: instance_id = "" try: self.Model( run_id=run_id, config_id=config_id, config=config.get_dictionary(), config_origin=config.origin, config_bin=pickle.dumps(config), cost=cost, time=time, instance_id=instance_id, seed=seed, status=status.value, additional_info=dict(additional_info), origin=origin.value, ).save() except Exception as e: pass self.timestamp = datetime.datetime.now()
def add(self, config: Configuration, cost: float, time: float, status: StatusType, instance_id: str = None, seed: int = None, additional_info: dict = None, origin: DataOrigin = DataOrigin.INTERNAL): """Adds a data of a new target algorithm (TA) run; it will update data if the same key values are used (config, instance_id, seed) Parameters ---------- config : dict (or other type -- depending on config space module) Parameter configuration cost: float Cost of TA run (will be minimized) time: float Runtime of TA run status: str Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT} instance_id: str String representing an instance (default: None) seed: int Random seed used by TA (default: None) additional_info: dict Additional run infos (could include further returned information from TA or fields such as start time and host_id) origin: DataOrigin Defines how data will be used. """ if not instance_id: instance_id = None config_id = self.config_ids.get(config) if config_id is None: # it's a new config new_id = get_id_of_config(config) self.config_ids[config] = new_id config_id = self.config_ids.get(config) self.ids_config[new_id] = config k = RunKey(config_id, instance_id, seed) v = RunValue(cost, time, status, additional_info) # Each runkey is supposed to be used only once. Repeated tries to add # the same runkey will be ignored silently if not capped. if self.overwrite_existing_runs or self.data.get(k) is None: self._add(k, v, status, origin) elif status != StatusType.CAPPED and self.data[ k].status == StatusType.CAPPED: # overwrite capped runs with uncapped runs self._add(k, v, status, origin) elif status == StatusType.CAPPED and self.data[ k].status == StatusType.CAPPED and cost > self.data[k].cost: # overwrite if censored with a larger cutoff self._add(k, v, status, origin)
def appointment_config(self, config) -> bool: config_id = get_id_of_config(config) query = self.Model.select().where(self.Model.config_id == config_id) if query.exists(): return False try: self.Model.create(config_id=config_id, origin=-1) except Exception as e: return False return True
def appointment_config(self, config, instance_id) -> bool: config_id = get_id_of_config(config) run_id = self.get_run_id(instance_id, config_id) query = self.Model.select().where(self.Model.run_id == run_id) if query.exists(): return False try: self.Model.create(run_id=run_id, origin=-1) except Exception as e: return False return True
def insert_runhistory(self, config: Configuration, cost: float, time: float, status: StatusType, instance_id: str = "", seed: int = 0, additional_info: dict = {}, origin: DataOrigin = DataOrigin.INTERNAL): config_id = get_id_of_config(config) if instance_id is None: instance_id = "" try: self.Model.create(config_id=config_id, config=json.dumps(config.get_dictionary()), config_origin=config.origin, config_bit=pickle.dumps(config), cost=cost, time=time, instance_id=instance_id, seed=seed, status=status.value, additional_info=json.dumps(additional_info), origin=origin.value, pid=os.getpid(), timestamp=datetime.datetime.now()) except pw.IntegrityError: self.Model( config_id=config_id, config=json.dumps(config.get_dictionary()), config_origin=config.origin, config_bit=pickle.dumps(config), cost=cost, time=time, instance_id=instance_id, seed=seed, status=status.value, additional_info=json.dumps(additional_info), origin=origin.value, pid=os.getpid(), timestamp=datetime.datetime.now(), ).save() self.timestamp = datetime.datetime.now()
def __call__(self, shp: Configuration): # 1. 将php变成model config_id = get_id_of_config(shp) start = time() dhp, model = self.shp2model(shp) # 2. 获取数据 X_train, y_train, X_test, y_test = self.get_Xy() # 3. 进行评价 info = self.evaluate(model, X_train, y_train, X_test, y_test) # todo : 考虑失败的情况 # 4. 持久化 cost_time = time() - start info["config_id"] = config_id info["program_hyper_param"] = shp info["dict_hyper_param"] = dhp estimator = list(dhp.get(PHASE2, {"unk": ""}).keys())[0] info["estimator"] = estimator info["cost_time"] = cost_time self.resource_manager.insert_to_trials_table(info) return {"loss": info["loss"], "status": info["status"]}
def __call__(self, php: Configuration): # 1. 将php变成model trial_id = get_id_of_config(php) start = time() dhp, model = self.php2model(php) # 2. 获取数据 X_train, y_train, X_test, y_test = self.get_Xy() # 3. 进行评价 loss, info = self.evaluate(model, X_train, y_train, X_test, y_test) # todo : 考虑失败的情况 # 4. 持久化 cost_time = time() - start info["trial_id"] = trial_id info["status"] = "success" info["program_hyper_param"] = php info["dict_hyper_param"] = dhp estimator = list(dhp.get("estimator", {"unk": ""}).keys())[0] info["estimator"] = estimator info["trial_id"] = trial_id info["cost_time"] = cost_time self.resource_manager.insert_to_db(info) return loss
from dsmac.runhistory.runhistory import RunHistoryDB from dsmac.runhistory.utils import get_id_of_config from dsmac.tae.execute_ta_run import StatusType if __name__ == '__main__': from ConfigSpace import ConfigurationSpace import joblib from dsmac.optimizer.objective import average_cost from dsmac.runhistory.runhistory import RunHistory runhistory = RunHistory(average_cost,db_args="test.db") cs: ConfigurationSpace = joblib.load("/home/tqc/PycharmProjects/auto-pipeline/test/php.bz2") runhistory.load_json( "/home/tqc/PycharmProjects/auto-pipeline/test/test_runhistory/default_dataset_name/smac_output/runhistory.json", cs) all_configs = (runhistory.get_all_configs()) config = all_configs[0] config_id = get_id_of_config(config) cost = runhistory.get_cost(config) db = RunHistoryDB(cs, runhistory, "test.db") db.delete_all() ans = db.appointment_config(config) print(ans) db.insert_runhistory(config, cost, 0.1, StatusType.SUCCESS) db2 = RunHistoryDB(cs, runhistory, "test.db") db2.insert_runhistory(all_configs[1], runhistory.get_cost(all_configs[1]), 0.1, StatusType.SUCCESS) db.fetch_new_runhistory()