def train(self, batches=5000):
        losses = np.zeros(batches)
        batch_size = 100

        loss = nn.MSELoss()
        optimiser = optim.SGD(self.actor.parameters(), lr=0.001)

        for i in range(batches):
            x, u = self.buffer.sample(batch_size)
            state = torch.FloatTensor(x)
            action = torch.FloatTensor(u)

            optimiser.zero_grad()

            outputs = self.actor(state)
            actor_loss = loss(outputs[:, 0], action)
            actor_loss.backward()
            optimiser.step()

            losses[i] = actor_loss

            if i % 500 == 0:
                print(f"Batch: {i}: Loss: {actor_loss}")

                lib.plot(losses, 100)

                self.save()

        return losses
Example #2
0
 def print_update(self, plot_reward=True):
     if self.ptr < 5:
         return
     mean = np.mean(self.rewards[0:self.ptr])
     score = self.rewards[-1]
     print(f"Run: {self.t_counter} --> Score: {score:.2f} --> Mean: {mean:.2f}  ")
     
     if plot_reward:
         lib.plot(self.rewards[0:self.ptr], figure_n=2)