Example #1
0
    smodel.summary()
    # agent.load("cartpole-dqn.h5")
    done = False
    batch_size = 32

    for e in range(EPISODES):
        # reset state in the beginning of each game
        state = env.reset()
        state = np.reshape(state, [1, state_size]) # turn the state into a one dimensional matrix which is a vector
        # time represents each frame of the game
        # Our goal is to keep the pole upright as long as possible until score of 500
        # the more time the more score
        for time in range(700):
            env.render()
            # Decide action
            action = agent.act(state) # maximum action ; pass our vector state to our NN in which we have state_size neurons
            # Advance the game to the next frame based on the action.
            # Reward is 1 for every frame the pole survived
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            # we are turning our next_state into a one dimensional matrix which is a vector
            # to calculate the maximum future reward for next state ; cause our model input 
            # is a one dimensional matrix which is a vector in which in our case is 4 neurons
            next_state = np.reshape(next_state, [1, state_size])
            # Remember the previous state, action, reward, and done
            agent.remember(state, action, reward, next_state, done)
            # make next_state the new current state for the next frame.
            state = next_state
            # done becomes True when the game ends
            # ex) The agent drops the pole
            if done: