Ejemplo n.º 1
0
def select_action(state, qnet, epsilon, action_size):
    sample = random.random()
    if sample > epsilon:
        # return qnet(
        #     state.type(FloatTensor)).detach().max(1)[1].view(1,1)

        return select_maxq_action(state, qnet)
    else:
        return LongTensor([[random.randrange(action_size)]])
Ejemplo n.º 2
0
            next_state = preprocess_state(next_state,state_dim)
            #print(state.shape, next_state.shape, reward.shape, done)
            # Store the transition in memory
            memory.push(state, action, next_state, reward, done, None)
            # Move to the next state
            state = next_state
            n_steps += 1
        scores.append(n_steps)

        state = np.append(env.reset(), np.random.normal(size = noise_dim))
        state = preprocess_state(state, state_dim)
        done = False
        n_steps = 0
        while not done:
            # Select and perform an action
            action = select_maxq_action(state, qnet)
            next_state, reward, done, _ = env.step(action.item())
            next_state = np.append(next_state, np.random.normal(size = noise_dim))
            next_state = preprocess_state(next_state, state_dim)
            state = next_state
            n_steps += 1
        dp_scores.append(n_steps)
        mean_score = np.mean(dp_scores)

        # Set the value we want to achieve
        if mean_score >= 195 and i_episode >= 30:
            print('Ran {} episodes. Solved after {} trials ✔'.format(i_episode, i_episode - 30))
            break
        if i_episode % 100 == 0:
            print('[Episode {}] - Mean survival time over last 100 episodes was {} ticks. Epsilon is {}'
                  .format(i_episode, mean_score, epsilon))