next_state = np.reshape(next_state, [1, state_size]) # Remember the previous state, action, reward, and done agent.remember(state, action, reward, next_state, done) # make next_state the new current state for the next frame. state = next_state # done becomes True when the game ends # ex) The agent drops the pole if done: agent.update_target_model() print("episode: {}/{}, score: {}, e: {:.2}" .format(e, EPISODES, time, agent.epsilon)) break if len(agent.memory) > batch_size: # train the agent with the experience of the episode # loss = agent.replay(batch_size) agent.replay(batch_size) # Logging training loss every 10 timesteps # if time % 10 == 0: # print("episode: {}/{}, time: {}, loss: {:.4f}" # .format(e, EPISODES, time, loss)) # if e % 10 == 0: # agent.save("cartpole-dqn.h5") # # -------------------------------------------------------------------------------------------------------------------------------------------------------- # SARSA Q-TABLE UPDATE EQUATIION # Q(s,a) = Q(s,a) + alpha(R + gamma*Q(s`,a`) - Q(s,a)) => regardless of negetive and positive value we'll use mse as our loss function # KERAS-RL ENV_NAME = 'CartPole-v0'