Beispiel #1
0
    def __init__(self, num_inputs, action_space, param, action_bound):

        self.action_bound = action_bound
        self.alpha = param['alpha']
        self.gamma = param['gamma']
        self.tau = param['tau']
        self.target_update_interval = param['target_update_interval']
        self.automatic_entropy_tuning = param['automatic_entropy_tuning']
        self.lr = param['lr']

        self.device = torch.device("cuda" if param['cuda'] else "cpu")

        self.critic = QNetwork(num_inputs, action_space).to(device=self.device)
        self.critic_optim = Adam(self.critic.parameters(), lr=self.lr)

        self.critic_target = QNetwork(num_inputs, action_space).to(self.device)
        hard_update(self.critic_target, self.critic)

        # Target Entropy = -dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper
        if self.automatic_entropy_tuning is True:
            self.target_entropy = -torch.prod(
                torch.Tensor(action_space).to(self.device)).item()
            self.log_alpha = torch.zeros(1,
                                         requires_grad=True,
                                         device=self.device)
            self.alpha_optim = Adam([self.log_alpha], lr=param['lr'])

        self.policy = CNNPolicy(num_inputs, action_space).to(self.device)
        self.policy_optim = Adam(self.policy.parameters(), lr=param['lr'])
Beispiel #2
0
            print('############Loading Policy Model###########')
            print('###########################################')
            state_dict = torch.load(file_policy)
            agent.policy.load_state_dict(state_dict)
        else:
            print('###########################################')
            print('############Start policy Training###########')
            print('###########################################')

        if os.path.exists(file_critic_1):
            print('###########################################')
            print('############Loading critic_1 Model###########')
            print('###########################################')
            state_dict = torch.load(file_critic_1)
            agent.critic_1.load_state_dict(state_dict)
            hard_update(agent.critic_1_target, agent.critic_1)

        else:
            print('###########################################')
            print('############Start critic_1 Training###########')
            print('###########################################')
    
        if os.path.exists(file_critic_2):
            print('###########################################')
            print('############Loading critic_2 Model###########')
            print('###########################################')
            state_dict = torch.load(file_critic_2)
            agent.critic_2.load_state_dict(state_dict)
            hard_update(agent.critic_2_target, agent.critic_2)
        else:
            print('###########################################')
    # np.random.seed(1)
    if rank == 0:
        policy_path = 'policy_0819'

        #actor
        actor = Actor(frames=LASER_HIST, action_space=2, max_action=MAX_ACTION)
        actor.cuda()

        actor_opt = Adam(actor.parameters(), lr=ACTOR_LEARNING_RATE)

        actor_target = Actor(frames=LASER_HIST,
                             action_space=2,
                             max_action=MAX_ACTION)
        actor_target.cuda()

        hard_update(actor_target, actor)

        #critic
        critic = Critic(frames=LASER_HIST, action_space=2)
        critic.cuda()

        critic_opt = Adam(critic.parameters(), lr=CRITIC_LEARNING_RATE)

        critic_target = Critic(frames=LASER_HIST, action_space=2)
        critic_target.cuda()

        hard_update(critic_target, critic)

        if not os.path.exists(policy_path):
            os.makedirs(policy_path)