コード例 #1
0
class RainbowAgent(dqn.RLAgent):

    def __init__(self):
        super().__init__()

        if Settings.CUDA:
            self.device = "cuda"
        else:
            self.device = "cpu"

        self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device)
        self.agent = None

    @classmethod
    def load(cls, path):
        rl_agent = cls()
        agent = GreedyAgent.load(path, rl_agent.env)
        rl_agent.agent = agent
        return rl_agent

    @classmethod
    def train(cls, num_frames: int):
        rl_agent = cls()
        preset = rainbow(
            device=rl_agent.device,
            lr=Settings.LEARNING_RATE,
        )
        experiment = SingleEnvExperiment(preset, rl_agent.env)
        experiment.train(num_frames)
        default_log_dir = experiment._writer.log_dir
        copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
        rmtree(default_log_dir)
        rl_agent.env.close()

    @classmethod
    def resume_training(cls, path, num_frames: int):
        rl_agent = cls()
        lr = Settings.LEARNING_RATE
        agent = rainbow(device=rl_agent.device, lr=lr)
        q_dist_module = torch.load(os.path.join(path, "q_dist.pt"), map_location='cpu').to(rl_agent.device)
        experiment = SingleEnvExperiment(agent, rl_agent.env)
        agent = experiment._agent
        old_q_dist = agent.q_dist
        old_q_dist.model.load_state_dict(q_dist_module.state_dict())
        experiment.train(frames=num_frames)
        default_log_dir = experiment._writer.log_dir
        copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
        rmtree(default_log_dir)
        rl_agent.env.close()

    def get_control(self, state: prediction.HighwayState) -> float:
        vector_state = dqn.get_state_vector_from_base_state(state)
        encoded_state = self.env._make_state(vector_state, False)
        action = self.agent.eval(encoded_state, 0).item()
        return Settings.JERK_VALUES_DQN[action]

    def _cleanup(self):
        self.env.close()
コード例 #2
0
class DDPGAgent(dqn.RLAgent):
    def __init__(self):
        super().__init__()

        if Settings.CUDA:
            self.device = "cuda"
        else:
            self.device = "cpu"

        self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device)
        self.agent = None

    @classmethod
    def load(cls, path) -> RLAgent:
        rl_agent = cls()
        agent = GreedyAgent.load(path, rl_agent.env)
        agent = TimeFeature(agent)
        rl_agent.env._lazy_init()
        rl_agent.agent = agent
        return rl_agent

    @classmethod
    def train(cls, num_frames: int):
        rl_agent = cls()
        preset = ddpg(device=rl_agent.device,
                      lr_q=Settings.LEARNING_RATE,
                      lr_pi=Settings.LEARNING_RATE)
        experiment = SingleEnvExperiment(preset, rl_agent.env)
        experiment.train(num_frames)
        default_log_dir = experiment._writer.log_dir
        copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
        rmtree(default_log_dir)
        rl_agent.env.close()

    @classmethod
    def resume_training(cls, path, num_frames: int):
        rl_agent = cls()
        lr = Settings.LEARNING_RATE
        agent = ddpg(device=rl_agent.device, lr_q=lr, lr_pi=lr)
        q_module = torch.load(os.path.join(path, "q.pt"),
                              map_location='cpu').to(rl_agent.device)
        policy_module = torch.load(os.path.join(path, "policy.pt"),
                                   map_location='cpu').to(rl_agent.device)

        experiment = SingleEnvExperiment(agent, rl_agent.env)
        agent = experiment._agent.agent
        old_q = agent.q
        old_q.model.load_state_dict(q_module.state_dict())
        old_policy = agent.policy
        old_policy.model.load_state_dict(policy_module.state_dict())
        experiment.train(frames=num_frames)
        default_log_dir = experiment._writer.log_dir
        copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
        rmtree(default_log_dir)
        rl_agent.env.close()

    def get_control(self, state: prediction.HighwayState) -> float:
        vector_state = dqn.get_state_vector_from_base_state(state)
        encoded_state = self.env._make_state(vector_state, False)
        first_action = self.agent.eval(encoded_state, 0).item()
        return first_action

    def end_episode_callback(self, last_state):
        self.agent.eval(
            self.env._make_state(
                dqn.get_state_vector_from_base_state(last_state), True), 0)

    def _cleanup(self):
        self.env.close()