def __init__( self, args: argparse.Namespace, env_info: ConfigDict, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): Learner.__init__(self, args, env_info, hyper_params, log_cfg) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.actor.configs.state_size = ( self.head_cfg.critic_vf.configs.state_size ) = self.env_info.observation_space.shape self.head_cfg.critic_qf.configs.state_size = ( self.env_info.observation_space.shape[0] + self.env_info.action_space.shape[0], ) self.head_cfg.actor.configs.output_size = self.env_info.action_space.shape[ 0] self.optim_cfg = optim_cfg self.update_step = 0 if self.hyper_params.auto_entropy_tuning: self.target_entropy = -np.prod( (self.env_info.action_space.shape[0], )).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = optim.Adam([self.log_alpha], lr=optim_cfg.lr_entropy) self._init_network()
def __init__( self, loss_type: ConfigDict, backbone: ConfigDict, head: ConfigDict, gru: ConfigDict, optim_cfg: ConfigDict, hyper_params: ConfigDict, log_cfg: ConfigDict, env_name: str, state_size: tuple, output_size: int, is_test: bool, load_from: str, ): Learner.__init__(self, hyper_params, log_cfg, env_name, is_test) self.backbone_cfg = backbone self.gru_cfg = gru self.head_cfg = head self.head_cfg.configs.state_size = state_size self.head_cfg.configs.output_size = output_size self.optim_cfg = optim_cfg self.use_n_step = self.hyper_params.n_step > 1 self.loss_type = loss_type self.load_from = load_from self._init_network()
def __init__( self, args: argparse.Namespace, env_info: ConfigDict, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, noise_cfg: ConfigDict, device: torch.device, ): Learner.__init__(self, args, env_info, hyper_params, log_cfg, device) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.critic.configs.state_size = ( self.env_info.observation_space.shape[0] + self.env_info.action_space.shape[0], ) self.head_cfg.actor.configs.state_size = self.env_info.observation_space.shape self.head_cfg.actor.configs.output_size = self.env_info.action_space.shape[ 0] self.optim_cfg = optim_cfg self.noise_cfg = noise_cfg self._init_network()
def __init__( self, args: argparse.Namespace, env_info: ConfigDict, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, device: str, ): Learner.__init__(self, args, env_info, hyper_params, log_cfg, device) if "stack_size" in args: state_dim = self.env_info.observation_space.shape[ 0] * args.stack_size else: state_dim = self.env_info.observation_space.shape[0] self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.configs.state_size = (state_dim, ) self.head_cfg.configs.output_size = self.env_info.action_space.n self.optim_cfg = optim_cfg self.use_n_step = self.hyper_params.n_step > 1 self._init_network()
def __init__( self, args: argparse.Namespace, env_info: ConfigDict, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, noise_cfg: ConfigDict, ): Learner.__init__(self, args, env_info, hyper_params, log_cfg) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.critic.configs.state_size = ( self.env_info.observation_space.shape[0] + self.env_info.action_space.shape[0], ) self.head_cfg.actor.configs.state_size = self.env_info.observation_space.shape self.head_cfg.actor.configs.output_size = self.env_info.action_space.shape[ 0] self.optim_cfg = optim_cfg self.noise_cfg = noise_cfg self.target_policy_noise = GaussianNoise( self.head_cfg.actor.configs.output_size, self.noise_cfg.target_policy_noise, self.noise_cfg.target_policy_noise, ) self.update_step = 0 self._init_network()
def __init__( self, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, trust_region: ConfigDict, hyper_params: ConfigDict, log_cfg: ConfigDict, env_info: ConfigDict, is_test: bool, load_from: str, ): Learner.__init__(self, hyper_params, log_cfg, env_info.name, is_test) self.backbone_cfg = backbone self.head_cfg = head self.load_from = load_from self.head_cfg.actor.configs.state_size = env_info.observation_space.shape self.head_cfg.critic.configs.state_size = env_info.observation_space.shape self.head_cfg.actor.configs.output_size = env_info.action_space.n self.head_cfg.critic.configs.output_size = env_info.action_space.n self.optim_cfg = optim_cfg self.gradient_clip = hyper_params.gradient_clip self.trust_region = trust_region self._init_network()
def __init__( self, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, noise_cfg: ConfigDict, env_name: str, state_size: tuple, output_size: int, is_test: bool, load_from: str, ): Learner.__init__(self, hyper_params, log_cfg, env_name, is_test) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.critic.configs.state_size = (state_size[0] + output_size, ) self.head_cfg.actor.configs.state_size = state_size self.head_cfg.actor.configs.output_size = output_size self.optim_cfg = optim_cfg self.noise_cfg = noise_cfg self.load_from = load_from self.target_policy_noise = GaussianNoise( self.head_cfg.actor.configs.output_size, self.noise_cfg.target_policy_noise, self.noise_cfg.target_policy_noise, ) self.update_step = 0 self._init_network()
def __init__( self, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, env_name: str, state_size: tuple, output_size: int, is_test: bool, load_from: str, ): Learner.__init__(self, hyper_params, log_cfg, env_name, is_test) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.actor.configs.state_size = ( self.head_cfg.critic.configs.state_size ) = state_size self.head_cfg.actor.configs.output_size = output_size self.optim_cfg = optim_cfg self.load_from = load_from self._init_network()
def save_params(self, n_episode: int): params = { "actor_state_dict": self.actor.state_dict(), "actor_optim_state_dict": self.actor_optim.state_dict(), "critic_state_dict": self.critic.state_dict(), "critic_optim_state_dict": self.critic_optim.state_dict(), } Learner._save_params(self, params, n_episode)
def load_params(self, path: str): """Load model and optimizer parameters.""" Learner.load_params(self, path) params = torch.load(path) self.dqn.load_state_dict(params["dqn_state_dict"]) self.dqn_target.load_state_dict(params["dqn_target_state_dict"]) self.dqn_optim.load_state_dict(params["dqn_optim_state_dict"]) print("[INFO] loaded the model and optimizer from", path)
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "actor_state_dict": self.actor.state_dict(), "critic_state_dict": self.critic.state_dict(), "actor_optim_state_dict": self.actor_optim.state_dict(), "critic_optim_state_dict": self.critic_optim.state_dict(), } Learner._save_params(self, params, n_episode)
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "dqn_state_dict": self.dqn.state_dict(), "dqn_target_state_dict": self.dqn_target.state_dict(), "dqn_optim_state_dict": self.dqn_optim.state_dict(), } Learner._save_params(self, params, n_episode)
def load_params(self, path: str): Learner.load_params(self, path) params = torch.load(path) self.actor.load_state_dict(params["actor_state_dict"]) self.critic.load_state_dict(params["critic_state_dict"]) self.actor_optim.load_state_dict(params["actor_optim_state_dict"]) self.critic_optim.load_state_dict(params["critic_optim_state_dict"]) print("[INFO] Loaded the model and optimizer from", path)
def load_params(self, path: str): """Load model and optimizer parameters.""" Learner.load_params(self, path) params = torch.load(path) self.critic1.load_state_dict(params["critic1"]) self.critic2.load_state_dict(params["critic2"]) self.critic_target1.load_state_dict(params["critic_target1"]) self.critic_target2.load_state_dict(params["critic_target2"]) self.critic_optim.load_state_dict(params["critic_optim"]) self.actor.load_state_dict(params["actor"]) self.actor_target.load_state_dict(params["actor_target"]) self.actor_optim.load_state_dict(params["actor_optim"]) print("[INFO] loaded the model and optimizer from", path)
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "actor": self.actor.state_dict(), "actor_target": self.actor_target.state_dict(), "actor_optim": self.actor_optim.state_dict(), "critic1": self.critic1.state_dict(), "critic2": self.critic2.state_dict(), "critic_target1": self.critic_target1.state_dict(), "critic_target2": self.critic_target2.state_dict(), "critic_optim": self.critic_optim.state_dict(), } Learner._save_params(self, params, n_episode)
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "actor": self.actor.state_dict(), "qf_1": self.qf_1.state_dict(), "qf_2": self.qf_2.state_dict(), "vf": self.vf.state_dict(), "vf_target": self.vf_target.state_dict(), "actor_optim": self.actor_optim.state_dict(), "qf_1_optim": self.qf_1_optim.state_dict(), "qf_2_optim": self.qf_2_optim.state_dict(), "vf_optim": self.vf_optim.state_dict(), } if self.hyper_params.auto_entropy_tuning: params["alpha_optim"] = self.alpha_optim.state_dict() Learner._save_params(self, params, n_episode)
def load_params(self, path: str): """Load model and optimizer parameters.""" Learner.load_params(self, path) params = torch.load(path) self.actor.load_state_dict(params["actor"]) self.qf_1.load_state_dict(params["qf_1"]) self.qf_2.load_state_dict(params["qf_2"]) self.vf.load_state_dict(params["vf"]) self.vf_target.load_state_dict(params["vf_target"]) self.actor_optim.load_state_dict(params["actor_optim"]) self.qf_1_optim.load_state_dict(params["qf_1_optim"]) self.qf_2_optim.load_state_dict(params["qf_2_optim"]) self.vf_optim.load_state_dict(params["vf_optim"]) if self.hyper_params.auto_entropy_tuning: self.alpha_optim.load_state_dict(params["alpha_optim"]) print("[INFO] loaded the model and optimizer from", path)
def __init__( self, args: argparse.Namespace, env_info: ConfigDict, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, loss_type: ConfigDict, ): Learner.__init__(self, args, env_info, hyper_params, log_cfg) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.configs.state_size = self.env_info.observation_space.shape self.head_cfg.configs.output_size = self.env_info.action_space.n self.optim_cfg = optim_cfg self.use_n_step = self.hyper_params.n_step > 1 self.loss_type = loss_type self._init_network()
def __init__( self, hyper_params: ConfigDict, log_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, env_name: str, state_size: tuple, output_size: int, is_test: bool, load_from: str, ): Learner.__init__(self, hyper_params, log_cfg, env_name, is_test) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.actor.configs.state_size = ( self.head_cfg.critic_vf.configs.state_size) = state_size self.head_cfg.critic_qf.configs.state_size = (state_size[0] + output_size, ) self.head_cfg.actor.configs.state_size = state_size self.head_cfg.actor.configs.output_size = output_size self.optim_cfg = optim_cfg self.load_from = load_from self.update_step = 0 if self.hyper_params.auto_entropy_tuning: self.target_entropy = -np.prod((output_size, )).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = optim.Adam([self.log_alpha], lr=optim_cfg.lr_entropy) self._init_network()