class RainbowAgent(dqn.RLAgent): def __init__(self): super().__init__() if Settings.CUDA: self.device = "cuda" else: self.device = "cpu" self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device) self.agent = None @classmethod def load(cls, path): rl_agent = cls() agent = GreedyAgent.load(path, rl_agent.env) rl_agent.agent = agent return rl_agent @classmethod def train(cls, num_frames: int): rl_agent = cls() preset = rainbow( device=rl_agent.device, lr=Settings.LEARNING_RATE, ) experiment = SingleEnvExperiment(preset, rl_agent.env) experiment.train(num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close() @classmethod def resume_training(cls, path, num_frames: int): rl_agent = cls() lr = Settings.LEARNING_RATE agent = rainbow(device=rl_agent.device, lr=lr) q_dist_module = torch.load(os.path.join(path, "q_dist.pt"), map_location='cpu').to(rl_agent.device) experiment = SingleEnvExperiment(agent, rl_agent.env) agent = experiment._agent old_q_dist = agent.q_dist old_q_dist.model.load_state_dict(q_dist_module.state_dict()) experiment.train(frames=num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close() def get_control(self, state: prediction.HighwayState) -> float: vector_state = dqn.get_state_vector_from_base_state(state) encoded_state = self.env._make_state(vector_state, False) action = self.agent.eval(encoded_state, 0).item() return Settings.JERK_VALUES_DQN[action] def _cleanup(self): self.env.close()
class DDPGAgent(dqn.RLAgent): def __init__(self): super().__init__() if Settings.CUDA: self.device = "cuda" else: self.device = "cpu" self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device) self.agent = None @classmethod def load(cls, path) -> RLAgent: rl_agent = cls() agent = GreedyAgent.load(path, rl_agent.env) agent = TimeFeature(agent) rl_agent.env._lazy_init() rl_agent.agent = agent return rl_agent @classmethod def train(cls, num_frames: int): rl_agent = cls() preset = ddpg(device=rl_agent.device, lr_q=Settings.LEARNING_RATE, lr_pi=Settings.LEARNING_RATE) experiment = SingleEnvExperiment(preset, rl_agent.env) experiment.train(num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close() @classmethod def resume_training(cls, path, num_frames: int): rl_agent = cls() lr = Settings.LEARNING_RATE agent = ddpg(device=rl_agent.device, lr_q=lr, lr_pi=lr) q_module = torch.load(os.path.join(path, "q.pt"), map_location='cpu').to(rl_agent.device) policy_module = torch.load(os.path.join(path, "policy.pt"), map_location='cpu').to(rl_agent.device) experiment = SingleEnvExperiment(agent, rl_agent.env) agent = experiment._agent.agent old_q = agent.q old_q.model.load_state_dict(q_module.state_dict()) old_policy = agent.policy old_policy.model.load_state_dict(policy_module.state_dict()) experiment.train(frames=num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close() def get_control(self, state: prediction.HighwayState) -> float: vector_state = dqn.get_state_vector_from_base_state(state) encoded_state = self.env._make_state(vector_state, False) first_action = self.agent.eval(encoded_state, 0).item() return first_action def end_episode_callback(self, last_state): self.agent.eval( self.env._make_state( dqn.get_state_vector_from_base_state(last_state), True), 0) def _cleanup(self): self.env.close()