def del_training_sessions(self, sessions: [int, list] = None): select_sessions_sql = '''SELECT id FROM training_session WHERE sim_model_id = {}'''.format(P_MARKER) params = (self._model_id, ) all_sessions = select_all(self.db, sql=select_sessions_sql, params=params) all_sessions = {t[0] for t in all_sessions} del_sessions = [] if isinstance(sessions, int): assert sessions in all_sessions, "Invalid session id {}".format( sessions) del_sessions = (sessions, ) if isinstance(sessions, list): assert set(sessions).issubset( all_sessions), "Invalid sessions list {}".format(sessions) del_sessions = tuple(sessions) if sessions is None: del_sessions = tuple(all_sessions) if len(del_sessions): cursor = self.db.cursor() sql = '''DELETE FROM training_iteration WHERE training_session_id IN ({})'''.format( SQLParamList(len(del_sessions))) cursor.execute(sql, del_sessions) sql = '''DELETE FROM training_session WHERE id IN ({})'''.format(SQLParamList( len(del_sessions))) cursor.execute(sql, del_sessions) self.db.commit()
def _add_iteration(self, n, session_id, start_time, best_checkpoint, result): cursor = self.db.cursor() iteration_other_data_keys = { 'info', 'training_iteration', 'experiment_id', 'date', 'timestamp', 'time_this_iter_s' } iteration_data = (session_id, n, result['episode_reward_mean'], result['episode_reward_min'], result['episode_reward_max'], best_checkpoint, (datetime.now() - start_time).total_seconds(), start_time, json.dumps( filter_dict(result, iteration_other_data_keys))) cursor.execute( '''INSERT INTO training_iteration ( training_session_id, id, reward_mean, reward_min, reward_max, checkpoint, duration, time_start, other_data) VALUES ({})'''.format( SQLParamList(9)), iteration_data) self.db.commit() return cursor.lastrowid
def _get_sim_config(self, sim_config: dict): cursor = self.db.cursor() if sim_config is None: sim_config_id = self._get_sim_base_config() else: sql = '''SELECT id, config FROM sim_config WHERE sim_model_id = {}'''.format(P_MARKER) params = (self._model_id, ) row_list = select_all(self.db, sql=sql, params=params) try: idx = [json.loads(config) for _, config in row_list].index(sim_config) sim_config_id, _ = row_list[idx] except Exception: params = (self._model_id, "Config {}".format(len(row_list)), self._get_baseline_avg(sim_config), json.dumps(sim_config)) cursor.execute( '''INSERT INTO sim_config (sim_model_id, name, baseline_avg, config) VALUES ({})''' .format(SQLParamList(4)), params) sim_config_id = cursor.lastrowid self.db.commit() return sim_config_id
def _add_baseline_run(self, policy_run_data: tuple): cursor = self.db.cursor() cursor.execute( '''INSERT INTO baseline_run ( sim_config_id, time_start, simulations, duration, results) VALUES ({})'''.format( SQLParamList(5)), policy_run_data) self.db.commit() return cursor.lastrowid
def run_baselines(self, sim_config: [int, list] = None, simulations: int = 1): @ray.remote def base_run(base): return base.run() if sim_config is None: # Get all sim configs for the current model select_sim_sql = '''SELECT id, config FROM sim_config WHERE sim_config.sim_model_id = {}'''.format( P_MARKER) rows = select_all(self.db, sql=select_sim_sql, params=(self._model_id, )) sim_configs = ((i, json.loads(config)) for i, config in rows) else: if isinstance(sim_config, int): sim_config = [sim_config] if isinstance(sim_config, list): # Get all policies for the list of sim_configs select_sim_sql = '''SELECT id, config FROM sim_config WHERE id IN ({})'''.format( SQLParamList(len(sim_config))) rows = select_all(self.db, sql=select_sim_sql, params=tuple(sim_config)) sim_configs = ((i, json.loads(config)) for i, config in rows) else: raise Exception("Invalid Sim Config {}".format(sim_config)) for sim_config_id, sim_config in sim_configs: base = self._sim_baseline(sim_config=sim_config) print("# Baseline Simulation for Config {} started at {}!".format( sim_config_id, datetime.now())) time_start = datetime.now() result_list = ray.get( [base_run.remote(base) for _ in range(simulations)]) # for i in range(simulations): # future_result_list.append(base_run.remote()) # print("# Progress: {:2.1%} ".format((i + 1) / simulations), end="\r") policy_run_data = (sim_config_id, time_start, simulations, (datetime.now() - time_start).total_seconds(), json.dumps(result_list)) self._add_baseline_run(policy_run_data) # print("# Progress: {:2.1%} ".format(1)) print("# Baseline Simulation for Config {} ended at {}!".format( sim_config_id, datetime.now()))
def _add_session(self, session_data: tuple): agent_config, sim_config_id = session_data agent_config.pop("env", None) agent_config.pop("env_config", None) cursor = self.db.cursor() _session_data = (self._model_id, sim_config_id, datetime.now(), json.dumps(agent_config)) cursor.execute( '''INSERT INTO training_session ( sim_model_id, sim_config_id, time_start, agent_config) VALUES ({})'''.format( SQLParamList(4)), _session_data) self.db.commit() return cursor.lastrowid
def _add_policy(self, policy_data: tuple): cursor = self.db.cursor() session_id, best_iteration, best_checkpoint, agent_config, sim_config_id = policy_data agent_config.pop("env", None) agent_config.pop("env_config", None) agent_config = json.dumps(agent_config) policy_data = (self._model_id, sim_config_id, session_id, best_iteration, best_checkpoint, agent_config) cursor.execute( '''INSERT INTO policy ( sim_model_id, sim_config_id, session_id, iteration_id, checkpoint, agent_config) VALUES ({})'''.format( SQLParamList(6)), policy_data) self.db.commit() return cursor.lastrowid
def launch_trainer(cluster_name: str = None, cloud_provider: str = '', cluster_config: dict = None): result = subprocess.run(['ls', _TRAINER_PATH(cluster_name, cloud_provider)], capture_output=True, text=True) # Create the Trainer Cluster if it does not exist. # No distinction exists between cloud providers, therefore training results are shared between runs in different # clouds if result.returncode != 0: # Create trainer folder result = subprocess.run(['cp', '-r', 'simpy_template', _TRAINER_PATH(cluster_name, cloud_provider)], capture_output=True, text=True) if result.returncode: print("Error Creating Trainer Directory {}".format(_TRAINER_PATH(cluster_name, cloud_provider))) print(result.stderr) cursor = _BACKOFFICE_DB.cursor() sql = "INSERT INTO trainer_cluster (name, cloud_provider, start, config) VALUES ({})".format(SQLParamList(4)) params = (cluster_name, cloud_provider, datetime.now(), json.dumps(cluster_config)) cursor.execute(sql, params) _BACKOFFICE_DB.commit() trainer_id = cursor.lastrowid else: sql = '''SELECT id FROM trainer_cluster WHERE name = {} and cloud_provider = {} and stop IS NULL'''.format(P_MARKER, P_MARKER) trainer_id, = select_record(_BACKOFFICE_DB, sql=sql, params=(cluster_name, cloud_provider)) # Create trainer yaml config file # When a cluster with the same name and provider is relaunched the configuration is overridden if cloud_provider != '': config_file = open(_TRAINER_YAML(cluster_name, cloud_provider), "wt") # ToDo: Test aws config_file.write(trainer_cluster_config(cloud_provider, cluster_name, _TRAINER_PATH(cluster_name, cloud_provider), config=cluster_config)) config_file.close() # launch the cluster result = subprocess.run(_CMD_PREFIX + "ray up {} --no-config-cache -y".format(_TRAINER_YAML( cluster_name, cloud_provider)), shell=True, capture_output=True, text=True, executable=_SHELL) subprocess.run(_CMD_PREFIX + "ray exec {} 'rm -r /home/ubuntu/trainer/*'".format( _TRAINER_YAML(cluster_name, cloud_provider)), shell=True, capture_output=True, text=True, executable=_SHELL) subprocess.run(_CMD_PREFIX + "ray rsync_up {} '{}/' '/home/ubuntu/trainer/'".format( _TRAINER_YAML(cluster_name, cloud_provider), _TRAINER_PATH(cluster_name, cloud_provider)), shell=True, capture_output=True, text=True, executable=_SHELL) _BACKOFFICE_DB.commit() return trainer_id, result
def deploy_policy(backend_server: ServeClient, trainer_id: int, policy_id: int, policy_config: dict = None): class ServeModel: def __init__(self, agent_config: dict, checkpoint_path: str, trainer_path: str, model_name: str): sim_path = '{}.models.{}'.format(trainer_path, model_name) exec_locals = {} try: exec("from {} import SimBaseline, N_ACTIONS, OBSERVATION_SPACE, SimModel, BASE_CONFIG".format( sim_path), {}, exec_locals) except ModuleNotFoundError: raise Exception(" Model '{}' not found!!".format(sim_path)) except Exception as e: raise e agent_config["num_workers"] = 0 agent_config["env"] = SimpyEnv agent_config["env_config"] = {"n_actions" : exec_locals['N_ACTIONS'], "observation_space": exec_locals['OBSERVATION_SPACE'], "sim_model" : exec_locals['SimModel'], "sim_config" : exec_locals['BASE_CONFIG']} # print(agent_config) # assert agent_config is not None and isinstance(agent_config, dict), \ # "Invalid Agent Config {} when deploying a policy!".format(agent_config) checkpoint_path = trainer_path + checkpoint_path[1:] print(checkpoint_path) # assert checkpoint_path is not None and isinstance(agent_config, str), \ # "Invalid Checkpoint Path {} when deploying a policy!".format(checkpoint_path) self.trainer = ppo.PPOTrainer(config=agent_config) self.trainer.restore(checkpoint_path) async def __call__(self, request: Request): json_input = await request.json() obs = json_input["observation"] action = self.trainer.compute_action(obs) return {"action": int(action)} # Get Trainer DB trainer_name, cloud_provider = _get_trainer_and_cloud(trainer_id=trainer_id) trainer_db = db_connect(_TRAINER_PATH(trainer_name, cloud_provider) + "/" + TRAINER_DB_NAME) # Get Policy info sql = '''SELECT sim_model.name, policy.checkpoint, policy.agent_config FROM policy INNER JOIN sim_model ON policy.sim_model_id = sim_model.id WHERE policy.id = {}'''.format(P_MARKER) row = select_record(trainer_db, sql=sql, params=(policy_id,)) assert row is not None, "Invalid Trainer ID {} and Policy ID {}".format(trainer_id, policy_id) model_name, checkpoint, saved_agent_config = row saved_agent_config = json.loads(saved_agent_config) if policy_config is None: policy_config = {'num_replicas': 1} policy_name = "trainer{}_policy{}".format(trainer_id, policy_id) trainer_path = _TRAINER_PATH(trainer_name, cloud_provider) backend_server.create_backend(policy_name, ServeModel, saved_agent_config, checkpoint, trainer_path, model_name, config=policy_config, ray_actor_options=_POLICY_ACTOR_CONFIG, env=CondaEnv(_CURRENT_ENV) ) insert_sql = '''INSERT OR IGNORE INTO policy ( trainer_id, policy_id, backend_name ) VALUES ({})'''.format(SQLParamList(3)) cursor = _BACKOFFICE_DB.cursor() cursor.execute(insert_sql, (trainer_id, policy_id, policy_name)) _BACKOFFICE_DB.commit() print("# Policy '{}' Deployed".format(policy_name)) return policy_name
def __init__(self, sim_name: str, log_level: str = "ERROR", checkpoint_path=None): exec_locals = {} try: exec( "from models.{} import SimBaseline, N_ACTIONS, OBSERVATION_SPACE, SimModel, BASE_CONFIG" .format(sim_name), {}, exec_locals) except ModuleNotFoundError: raise Exception(" Model '{}' not found!!".format(sim_name)) except Exception as e: raise e try: self.db = db_connect(TRAINER_DB_NAME) except Exception as e: raise e assert isinstance(exec_locals['BASE_CONFIG'], dict), "Simulation Config {} must be a dict!".format( exec_locals['BASE_CONFIG']) assert log_level in ["DEBUG", "INFO", "WARN", "ERROR"], "Invalid log_level {}".format(log_level) if not ray.is_initialized(): my_ray_init() self._sim_baseline = exec_locals['SimBaseline'] sql = '''SELECT id FROM sim_model WHERE name = {}'''.format(P_MARKER) params = (sim_name, ) row = select_record(self.db, sql=sql, params=params) if row is None: cursor = self.db.cursor() cursor.execute( '''INSERT INTO sim_model (name) VALUES ({})'''.format( P_MARKER), params) self._model_id = cursor.lastrowid params = (self._model_id, self.default_sim_config_name, self._get_baseline_avg(exec_locals['BASE_CONFIG']), json.dumps(exec_locals['BASE_CONFIG'])) cursor.execute( '''INSERT INTO sim_config (sim_model_id, name, baseline_avg, config) VALUES ({})'''. format(SQLParamList(4)), params) self.db.commit() print("# {} Created!".format(sim_name)) else: self._model_id, = row self._config = self.ppo_config.copy() self._config["log_level"] = log_level self._config["env"] = SimpyEnv # ToDo: Change the Observation Space to a fucntion that receive a Sim Config as a parameter. # In this part of the code it received exec_locals['BASE_CONFIG'] self._config["env_config"] = { "n_actions": exec_locals['N_ACTIONS'], "observation_space": exec_locals['OBSERVATION_SPACE'], "sim_model": exec_locals['SimModel'], "sim_config": exec_locals['BASE_CONFIG'] } if checkpoint_path is None: self.checkpoint_path = self.default_sim_checkpoint_path
def run_policies(self, policy: [int, list] = None, simulations: int = 1): select_policy_sql = '''SELECT id FROM policy WHERE sim_model_id = {}'''.format(P_MARKER) all_policies = select_all(self.db, sql=select_policy_sql, params=(self._model_id, )) all_policies = {t[0] for t in all_policies} if isinstance(policy, int): assert policy in all_policies, "Invalid session id {}".format( policy) policies = (policy, ) elif isinstance(policy, list): assert set(policy).issubset( all_policies), "Invalid sessions list {}".format(policy) policies = tuple(policy) else: policies = tuple(all_policies) select_policy_sql = '''SELECT policy.id, checkpoint, agent_config, sim_config.config as s_config FROM policy INNER JOIN sim_config ON policy.sim_config_id = sim_config.id WHERE policy.id IN ({})'''.format( SQLParamList(len(policies))) policy_data = select_all(self.db, sql=select_policy_sql, params=policies) for policy_id, checkpoint, saved_agent_config, saved_sim_config in policy_data: print("# Running AI Policy {} started at {}!".format( policy_id, datetime.now())) agent_config = self._config.copy() agent_config.update(json.loads(saved_agent_config)) sim_config = json.loads(saved_sim_config) agent = ppo.PPOTrainer(config=agent_config) agent.restore(checkpoint) time_start = datetime.now() # instantiate env class agent_config["env_config"]["sim_config"].update(sim_config) he = SimpyEnv(agent_config["env_config"]) result_list = [] for i in range(simulations): # run until episode ends episode_reward = 0 done = False obs = he.reset() while not done: action = agent.compute_action(obs) obs, reward, done, info = he.step(action) episode_reward += reward result_list.append(episode_reward) print("# Progress: {:2.1%} ".format((i + 1) / simulations), end="\r") policy_run_data = (policy_id, time_start, simulations, (datetime.now() - time_start).total_seconds(), json.dumps(result_list)) self._add_policy_run(policy_run_data) print("# Progress: {:2.1%} ".format(1)) print("# Running AI Policy {} ended at {}!".format( policy_id, datetime.now()))