Exemple #1
0
            # Advance the game to the next frame based on the action.
            # Reward is 1 for every frame the pole survived
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            # we are turning our next_state into a one dimensional matrix which is a vector
            # to calculate the maximum future reward for next state ; cause our model input 
            # is a one dimensional matrix which is a vector in which in our case is 4 neurons
            next_state = np.reshape(next_state, [1, state_size])
            # Remember the previous state, action, reward, and done
            agent.remember(state, action, reward, next_state, done)
            # make next_state the new current state for the next frame.
            state = next_state
            # done becomes True when the game ends
            # ex) The agent drops the pole
            if done:
                agent.update_target_model()
                print("episode: {}/{}, score: {}, e: {:.2}"
                      .format(e, EPISODES, time, agent.epsilon))
                break
            if len(agent.memory) > batch_size:
                # train the agent with the experience of the episode
                # loss = agent.replay(batch_size)
                agent.replay(batch_size)
                # Logging training loss every 10 timesteps
                # if time % 10 == 0:
                #     print("episode: {}/{}, time: {}, loss: {:.4f}"
                #         .format(e, EPISODES, time, loss))  
#         if e % 10 == 0:
#             agent.save("cartpole-dqn.h5")

# # --------------------------------------------------------------------------------------------------------------------------------------------------------