Ejemplo n.º 1
0
    """
    def __init__(self, size):
        self.size = size
        self.memory = deque(maxlen=self.size)

    def update(self, SARS):
        self.memory.append(SARS)

    def sample(self, batch_size):
        return zip(*random.sample(self.memory, batch_size))


r_memory = ReplayMemory(memory_size)
agent = DQN(12, 12, 16)
target = DQN(12, 12, 16)
target.load_state_dict(agent.state_dict())
optimizer = Adam(agent.parameters())


def update_target():
    if len(r_memory.memory) < batch_size:
        return
    observation, action, reward, observation_next, done = r_memory.sample(
        batch_size)
    observations = torch.cat(observation)
    observation_next = torch.cat(observation_next)
    actions = index_action(torch.LongTensor(action))
    rewards = torch.LongTensor(reward)
    done = torch.FloatTensor(done)
    q_values = agent(observations)
    p_q_values_next = agent(observation_next)
Ejemplo n.º 2
0
    data = np.load('training_data.npy')
    X_train = np.stack(data[:, 0], axis=0).reshape(
        (len(data), 1, len(data[0][0]), len(data[0][0][0])))
    Y_train = data[:, 3]

    # Training loop
    for epoch in range(100):
        # Randomize and batch training data
        batchsize = 8
        # Randomly shuffle each epoch
        np.random.shuffle(X_train)
        np.random.shuffle(Y_train)
        # Batch
        X = np.array_split(X_train, batchsize)  # States
        Y = np.array_split(Y_train, batchsize)  # Actions

        loss = 0.
        for X_batch, Y_batch in zip(X, Y):
            X_batch = Variable(FloatTensor(X_batch), requires_grad=True)
            Y_batch = Variable(LongTensor(Y_batch), requires_grad=False)
            loss += train(X_batch, Y_batch)

        if epoch % 10 == 0:
            save_checkpoint(
                {
                    'epoch': epoch,
                    'best_score': 0.,
                    'state_dict': model.state_dict()
                }, 'supervised_checkpoint.pth.tar')
            print('[{0}] loss: {1}'.format(epoch + 1, loss))