# Advance the game to the next frame based on the action. # Reward is 1 for every frame the pole survived next_state, reward, done, _ = env.step(action) reward = reward if not done else -10 # we are turning our next_state into a one dimensional matrix which is a vector # to calculate the maximum future reward for next state ; cause our model input # is a one dimensional matrix which is a vector in which in our case is 4 neurons next_state = np.reshape(next_state, [1, state_size]) # Remember the previous state, action, reward, and done agent.remember(state, action, reward, next_state, done) # make next_state the new current state for the next frame. state = next_state # done becomes True when the game ends # ex) The agent drops the pole if done: agent.update_target_model() print("episode: {}/{}, score: {}, e: {:.2}" .format(e, EPISODES, time, agent.epsilon)) break if len(agent.memory) > batch_size: # train the agent with the experience of the episode # loss = agent.replay(batch_size) agent.replay(batch_size) # Logging training loss every 10 timesteps # if time % 10 == 0: # print("episode: {}/{}, time: {}, loss: {:.4f}" # .format(e, EPISODES, time, loss)) # if e % 10 == 0: # agent.save("cartpole-dqn.h5") # # --------------------------------------------------------------------------------------------------------------------------------------------------------