Example #1
 # Our goal is to keep the pole upright as long as possible until score of 500
 # the more time the more score
 for time in range(700):
     # Decide action
     action = agent.act(state) # maximum action ; pass our vector state to our NN in which we have state_size neurons
     # Advance the game to the next frame based on the action.
     # Reward is 1 for every frame the pole survived
     next_state, reward, done, _ = env.step(action)
     reward = reward if not done else -10
     # we are turning our next_state into a one dimensional matrix which is a vector
     # to calculate the maximum future reward for next state ; cause our model input 
     # is a one dimensional matrix which is a vector in which in our case is 4 neurons
     next_state = np.reshape(next_state, [1, state_size])
     # Remember the previous state, action, reward, and done
     agent.remember(state, action, reward, next_state, done)
     # make next_state the new current state for the next frame.
     state = next_state
     # done becomes True when the game ends
     # ex) The agent drops the pole
     if done:
         print("episode: {}/{}, score: {}, e: {:.2}"
               .format(e, EPISODES, time, agent.epsilon))
     if len(agent.memory) > batch_size:
         # train the agent with the experience of the episode
         # loss = agent.replay(batch_size)
         # Logging training loss every 10 timesteps
         # if time % 10 == 0: