コード例 #1
0
    def __init__(self, Testing=False):
        self.testing = Testing
        if not os.path.exists("./models/controller"):
            os.makedirs("./models/controller")

        self.actor = Actor(state_dim, action_dim, BATCH_SIZE, TAU, LRA)

        try:
            self.actor.model.load_state_dict(
                torch.load("./models/controller/actor.pt"))
            self.actor.target_model.load_state_dict(
                torch.load("./models/controller/actor.pt"))
            print("Load actor model successfully")
        except:
            print("Cannot find actor weights in this directory")

        if self.testing is False:
            self.buff = ReplayBuffer(BUFFER_SIZE)
            self.OU = OU()
            self.critic = Critic(state_dim, action_dim, BATCH_SIZE, TAU, LRC)
            try:
                self.critic.model.load_state_dict(
                    torch.load("./models/controller/critic.pt"))
                self.critic.target_model.load_state_dict(
                    torch.load("./models/controller/critic.pt"))
                print("Load critic model successfully")
            except:
                print("Cannot find critic weights in this directory")
        else:
            self.actor.model.eval()
コード例 #2
0
class ddpgAgent():
    def __init__(self, Testing=False):
        self.testing = Testing
        if not os.path.exists("./models/controller"):
            os.makedirs("./models/controller")

        self.actor = Actor(state_dim, action_dim, BATCH_SIZE, TAU, LRA)

        try:
            self.actor.model.load_state_dict(
                torch.load("./models/controller/actor.pt"))
            self.actor.target_model.load_state_dict(
                torch.load("./models/controller/actor.pt"))
            print("Load actor model successfully")
        except:
            print("Cannot find actor weights in this directory")

        if self.testing is False:
            self.buff = ReplayBuffer(BUFFER_SIZE)
            self.OU = OU()
            self.critic = Critic(state_dim, action_dim, BATCH_SIZE, TAU, LRC)
            try:
                self.critic.model.load_state_dict(
                    torch.load("./models/controller/critic.pt"))
                self.critic.target_model.load_state_dict(
                    torch.load("./models/controller/critic.pt"))
                print("Load critic model successfully")
            except:
                print("Cannot find critic weights in this directory")
        else:
            self.actor.model.eval()

    def loadTestModels(self, actor):
        current_actor = "./models/controller/intermittent/" + actor
        try:
            self.actor.model.load_state_dict(torch.load(current_actor))
            self.actor.target_model.load_state_dict(torch.load(current_actor))
            print("Load intermittent actor model successfully", current_actor)
        except:
            print("Cannot find intermittent actor weights in this directory")
        self.actor.model.eval()
        return

    def getAction(self, state, epsilon):
        action = np.zeros([1, action_dim])
        noise = np.zeros([1, action_dim])
        with torch.no_grad():
            action_original = self.actor.model(
                self.to_tensor(state.reshape(1, state.shape[0])))
        action_original = self.to_array(action_original)
        if self.testing is False:
            #noise[0][0] = (1.0-float(self.testing)) * max(epsilon, 0) * self.OU.function(action_original[0][0], 0.2, 1.00, 0.10)
            noise[0][0] = max(epsilon, 0) * self.OU.function(
                action_original[0][0], 0, 0.60, 0.10)
            #print('noise is :',noise[0][0],'action is:',action_original)
        action[0][0] = action_original[0][0] + noise[0][0]
        if action[0][0] < 0.0:
            action[0][0] = 0.0
        if action[0][0] > 1.0:
            action[0][0] = 1.0
        #print("NN Controller: {:5.4f}, Noise NN Controller: {:5.4f}".format(action_original[0][0], action[0][0]))
        return action

    def storeTrajectory(self, s, a, r, s_, done):
        self.buff.add(s, a[0], r, s_, done)

    def learn(self):
        batch = self.buff.getBatch(BATCH_SIZE)
        states = np.asarray([e[0] for e in batch])
        actions = np.asarray([e[1] for e in batch])
        rewards = np.asarray([e[2] for e in batch])
        new_states = np.asarray([e[3] for e in batch])
        dones = np.asarray([e[4] for e in batch])
        y_t = np.asarray([e[1] for e in batch])

        with torch.no_grad():
            target_q_values = self.critic.target_model([
                self.to_tensor(new_states),
                self.actor.target_model(self.to_tensor(new_states))
            ])
        target_q_values = self.to_array(target_q_values)

        for k in range(len(batch)):
            if dones[k]:
                y_t[k] = rewards[k]
            else:
                y_t[k] = rewards[k] + GAMMA * target_q_values[k]

        # train the critic model
        self.critic.model.zero_grad()
        q_values = self.critic.model(
            [self.to_tensor(states),
             self.to_tensor(actions)])
        self.critic.train(q_values, self.to_tensor(y_t))

        # train the actor model
        self.actor.model.zero_grad()
        policy_loss = -self.critic.model(
            [self.to_tensor(states),
             self.actor.model(self.to_tensor(states))])
        self.actor.train(policy_loss)

        self.actor.target_train()
        self.critic.target_train()

    def save_model(self):
        print("Saving model now...")
        torch.save(self.actor.model.state_dict(),
                   "./models/controller/actor.pt")
        torch.save(self.critic.model.state_dict(),
                   "./models/controller/critic.pt")

    def save_intermittent_model(self, episode):
        print("Saving intermittent model now...")
        actor_name = './models/controller/intermittent/' + 'actor' + str(
            episode) + '.pt'
        critic_name = './models/controller/intermittent/' + 'critic' + str(
            episode) + '.pt'
        torch.save(self.actor.model.state_dict(), actor_name)
        torch.save(self.critic.model.state_dict(), critic_name)

    def to_tensor(self, numpy_array):
        return torch.from_numpy(numpy_array).float().cuda()

    def to_array(self, torch_tensor):
        return torch_tensor.cpu().float().numpy()