Example #1
0
            next_state = np.reshape(next_state, [1, state_size])
            # Remember the previous state, action, reward, and done
            agent.remember(state, action, reward, next_state, done)
            # make next_state the new current state for the next frame.
            state = next_state
            # done becomes True when the game ends
            # ex) The agent drops the pole
            if done:
                agent.update_target_model()
                print("episode: {}/{}, score: {}, e: {:.2}"
                      .format(e, EPISODES, time, agent.epsilon))
                break
            if len(agent.memory) > batch_size:
                # train the agent with the experience of the episode
                # loss = agent.replay(batch_size)
                agent.replay(batch_size)
                # Logging training loss every 10 timesteps
                # if time % 10 == 0:
                #     print("episode: {}/{}, time: {}, loss: {:.4f}"
                #         .format(e, EPISODES, time, loss))  
#         if e % 10 == 0:
#             agent.save("cartpole-dqn.h5")

# # --------------------------------------------------------------------------------------------------------------------------------------------------------

# SARSA Q-TABLE UPDATE EQUATIION
# Q(s,a) = Q(s,a) + alpha(R + gamma*Q(s`,a`) - Q(s,a)) => regardless of negetive and positive value we'll use mse as our loss function

# KERAS-RL

ENV_NAME = 'CartPole-v0'