예제 #1
0
    def _setup_actor_critic_agent(self, config: Config, load_from_ckpt: bool,
                                  ckpt_path: str) -> None:
        r"""Sets up actor critic and agent.
        Args:
            config: MODEL config
        Returns:
            None
        """
        config.defrost()
        config.TORCH_GPU_ID = self.config.TORCH_GPU_ID
        config.freeze()

        if config.CMA.use:
            self.actor_critic = CMAPolicy(
                observation_space=self.envs.observation_spaces[0],
                action_space=self.envs.action_spaces[0],
                model_config=config,
            )
        else:
            self.actor_critic = Seq2SeqPolicy(
                observation_space=self.envs.observation_spaces[0],
                action_space=self.envs.action_spaces[0],
                model_config=config,
            )
        self.actor_critic.to(self.device)

        self.optimizer = torch.optim.Adam(self.actor_critic.parameters(),
                                          lr=self.config.DAGGER.LR)
        if load_from_ckpt:
            ckpt_dict = self.load_checkpoint(ckpt_path, map_location="cpu")
            self.actor_critic.load_state_dict(ckpt_dict["state_dict"])
            logger.info(f"Loaded weights from checkpoint: {ckpt_path}")
        logger.info("Finished setting up actor critic model.")
예제 #2
0
def get_defaut_config():
    c = Config()
    c.INPUT_TYPE = "blind"
    c.MODEL_PATH = "data/checkpoints/blind.pth"
    c.RL.PPO.hidden_size = 512
    c.RANDOM_SEED = 7
    c.TORCH_GPU_ID = 0
    return c