def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) if if_per_or_gae: # if_use_per self.criterion = torch.nn.MSELoss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.MSELoss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw self.get_obj_critic = self.get_obj_critic_raw
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): """ Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. """ AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) if if_per_or_gae: # if_use_per self.criterion = torch.nn.SmoothL1Loss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.SmoothL1Loss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) self.act = self.cri = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.if_use_act_target: self.act_target = self.cri_target = deepcopy(self.act) else: self.act_target = self.cri_target = self.act self.cri_optim = torch.optim.Adam([ { 'params': self.act.enc_s.parameters(), 'lr': learning_rate * 1.25 }, { 'params': self.act.enc_a.parameters(), }, { 'params': self.act.mid_n.parameters(), 'lr': learning_rate * 1.25 }, { 'params': self.act.dec_a.parameters(), }, { 'params': self.act.dec_q.parameters(), }, ], lr=learning_rate) self.act_optim = self.cri_optim if if_per_or_gae: # if_use_per self.criterion = torch.nn.MSELoss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.MSELoss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): """ Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. """ AgentBase.init(self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) self.traj_list = [list() for _ in range(env_num)] self.env_num = env_num if if_per_or_gae: # if_use_gae self.get_reward_sum = self.get_reward_sum_gae else: self.get_reward_sum = self.get_reward_sum_raw if env_num == 1: self.explore_env = self.explore_one_env else: self.explore_env = self.explore_vec_env
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): """ Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. """ AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) self.alpha_log = torch.tensor( (-np.log(action_dim) * np.e, ), dtype=torch.float32, requires_grad=True, device=self.device) # trainable parameter self.alpha_optim = torch.optim.Adam((self.alpha_log, ), lr=learning_rate) self.target_entropy = np.log(action_dim) if if_per_or_gae: # if_use_per self.criterion = torch.nn.SmoothL1Loss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.SmoothL1Loss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) #self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32, # requires_grad=True, device=self.device) # trainable parameter self.alpha_log = torch.zeros(1, dtype=torch.float32, requires_grad=True, device=self.device) self.alpha_optim = torch.optim.Adam((self.alpha_log, ), lr=learning_rate) self.target_entropy = np.log(action_dim) self.alpha = alpha = self.alpha_log.cpu().exp().item() self.trajectory_list = list() if if_per_or_gae: # if_use_per self.criterion = torch.nn.SmoothL1Loss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.MSELoss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw