smodel.summary() # agent.load("cartpole-dqn.h5") done = False batch_size = 32 for e in range(EPISODES): # reset state in the beginning of each game state = env.reset() state = np.reshape(state, [1, state_size]) # turn the state into a one dimensional matrix which is a vector # time represents each frame of the game # Our goal is to keep the pole upright as long as possible until score of 500 # the more time the more score for time in range(700): env.render() # Decide action action = agent.act(state) # maximum action ; pass our vector state to our NN in which we have state_size neurons # Advance the game to the next frame based on the action. # Reward is 1 for every frame the pole survived next_state, reward, done, _ = env.step(action) reward = reward if not done else -10 # we are turning our next_state into a one dimensional matrix which is a vector # to calculate the maximum future reward for next state ; cause our model input # is a one dimensional matrix which is a vector in which in our case is 4 neurons next_state = np.reshape(next_state, [1, state_size]) # Remember the previous state, action, reward, and done agent.remember(state, action, reward, next_state, done) # make next_state the new current state for the next frame. state = next_state # done becomes True when the game ends # ex) The agent drops the pole if done: