Example #1
0
 def init(self,
          net_dim=256,
          state_dim=8,
          action_dim=2,
          reward_scale=1.0,
          gamma=0.99,
          learning_rate=1e-4,
          if_per_or_gae=False,
          env_num=1,
          gpu_id=0):
     AgentBase.init(
         self,
         net_dim=net_dim,
         state_dim=state_dim,
         action_dim=action_dim,
         reward_scale=reward_scale,
         gamma=gamma,
         learning_rate=learning_rate,
         if_per_or_gae=if_per_or_gae,
         env_num=env_num,
         gpu_id=gpu_id,
     )
     if if_per_or_gae:  # if_use_per
         self.criterion = torch.nn.MSELoss(reduction='none')
         self.get_obj_critic = self.get_obj_critic_per
     else:
         self.criterion = torch.nn.MSELoss(reduction='mean')
         self.get_obj_critic = self.get_obj_critic_raw
     self.get_obj_critic = self.get_obj_critic_raw
Example #2
0
 def init(self,
          net_dim=256,
          state_dim=8,
          action_dim=2,
          reward_scale=1.0,
          gamma=0.99,
          learning_rate=1e-4,
          if_per_or_gae=False,
          env_num=1,
          gpu_id=0):
     """
     Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. 
     """
     AgentBase.init(
         self,
         net_dim=net_dim,
         state_dim=state_dim,
         action_dim=action_dim,
         reward_scale=reward_scale,
         gamma=gamma,
         learning_rate=learning_rate,
         if_per_or_gae=if_per_or_gae,
         env_num=env_num,
         gpu_id=gpu_id,
     )
     if if_per_or_gae:  # if_use_per
         self.criterion = torch.nn.SmoothL1Loss(reduction='none')
         self.get_obj_critic = self.get_obj_critic_per
     else:
         self.criterion = torch.nn.SmoothL1Loss(reduction='mean')
         self.get_obj_critic = self.get_obj_critic_raw
Example #3
0
    def init(self,
             net_dim=256,
             state_dim=8,
             action_dim=2,
             reward_scale=1.0,
             gamma=0.99,
             learning_rate=1e-4,
             if_per_or_gae=False,
             env_num=1,
             gpu_id=0):
        AgentBase.init(
            self,
            net_dim=net_dim,
            state_dim=state_dim,
            action_dim=action_dim,
            reward_scale=reward_scale,
            gamma=gamma,
            learning_rate=learning_rate,
            if_per_or_gae=if_per_or_gae,
            env_num=env_num,
            gpu_id=gpu_id,
        )
        self.act = self.cri = self.ClassAct(net_dim, state_dim,
                                            action_dim).to(self.device)
        if self.if_use_act_target:
            self.act_target = self.cri_target = deepcopy(self.act)
        else:
            self.act_target = self.cri_target = self.act

        self.cri_optim = torch.optim.Adam([
            {
                'params': self.act.enc_s.parameters(),
                'lr': learning_rate * 1.25
            },
            {
                'params': self.act.enc_a.parameters(),
            },
            {
                'params': self.act.mid_n.parameters(),
                'lr': learning_rate * 1.25
            },
            {
                'params': self.act.dec_a.parameters(),
            },
            {
                'params': self.act.dec_q.parameters(),
            },
        ],
                                          lr=learning_rate)
        self.act_optim = self.cri_optim

        if if_per_or_gae:  # if_use_per
            self.criterion = torch.nn.MSELoss(reduction='none')
            self.get_obj_critic = self.get_obj_critic_per
        else:
            self.criterion = torch.nn.MSELoss(reduction='mean')
            self.get_obj_critic = self.get_obj_critic_raw
Example #4
0
    def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99,
             learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0):
        """
        Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. 
        """
        AgentBase.init(self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim,
                       reward_scale=reward_scale, gamma=gamma,
                       learning_rate=learning_rate, if_per_or_gae=if_per_or_gae,
                       env_num=env_num, gpu_id=gpu_id, )
        self.traj_list = [list() for _ in range(env_num)]
        self.env_num = env_num

        if if_per_or_gae:  # if_use_gae
            self.get_reward_sum = self.get_reward_sum_gae
        else:
            self.get_reward_sum = self.get_reward_sum_raw
        if env_num == 1:
            self.explore_env = self.explore_one_env
        else:
            self.explore_env = self.explore_vec_env
Example #5
0
    def init(self,
             net_dim=256,
             state_dim=8,
             action_dim=2,
             reward_scale=1.0,
             gamma=0.99,
             learning_rate=1e-4,
             if_per_or_gae=False,
             env_num=1,
             gpu_id=0):
        """
        Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. 
        """
        AgentBase.init(
            self,
            net_dim=net_dim,
            state_dim=state_dim,
            action_dim=action_dim,
            reward_scale=reward_scale,
            gamma=gamma,
            learning_rate=learning_rate,
            if_per_or_gae=if_per_or_gae,
            env_num=env_num,
            gpu_id=gpu_id,
        )

        self.alpha_log = torch.tensor(
            (-np.log(action_dim) * np.e, ),
            dtype=torch.float32,
            requires_grad=True,
            device=self.device)  # trainable parameter
        self.alpha_optim = torch.optim.Adam((self.alpha_log, ),
                                            lr=learning_rate)
        self.target_entropy = np.log(action_dim)

        if if_per_or_gae:  # if_use_per
            self.criterion = torch.nn.SmoothL1Loss(reduction='none')
            self.get_obj_critic = self.get_obj_critic_per
        else:
            self.criterion = torch.nn.SmoothL1Loss(reduction='mean')
            self.get_obj_critic = self.get_obj_critic_raw
Example #6
0
    def init(self,
             net_dim=256,
             state_dim=8,
             action_dim=2,
             reward_scale=1.0,
             gamma=0.99,
             learning_rate=1e-4,
             if_per_or_gae=False,
             env_num=1,
             gpu_id=0):
        AgentBase.init(
            self,
            net_dim=net_dim,
            state_dim=state_dim,
            action_dim=action_dim,
            reward_scale=reward_scale,
            gamma=gamma,
            learning_rate=learning_rate,
            if_per_or_gae=if_per_or_gae,
            env_num=env_num,
            gpu_id=gpu_id,
        )

        #self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
        #                              requires_grad=True, device=self.device)  # trainable parameter
        self.alpha_log = torch.zeros(1,
                                     dtype=torch.float32,
                                     requires_grad=True,
                                     device=self.device)
        self.alpha_optim = torch.optim.Adam((self.alpha_log, ),
                                            lr=learning_rate)
        self.target_entropy = np.log(action_dim)
        self.alpha = alpha = self.alpha_log.cpu().exp().item()
        self.trajectory_list = list()
        if if_per_or_gae:  # if_use_per
            self.criterion = torch.nn.SmoothL1Loss(reduction='none')
            self.get_obj_critic = self.get_obj_critic_per
        else:
            self.criterion = torch.nn.MSELoss(reduction='mean')
            self.get_obj_critic = self.get_obj_critic_raw