예제 #1
0
            action = np.random.randint(0, 4)
        else:
            qvalues = model.predict(currentState)[0]
            action = np.argmax(qvalues)

        #Updating the Environment
        frame, reward, gameOver = env.step(action)

        #We need to reshape the frame(2D) to add it to the nextState (4D)
        frame = np.reshape(frame, (1, env.nColumns, env.nRows, 1))
        nextState = np.append(nextState, frame, axis=3)
        nextState = np.delete(nextState, 0, axis=3)

        #Remembering new experience and training the AI
        DQN.remember([currentState, action, reward, nextState], gameOver)
        inputs, targets = DQN.getBatch(model, batchSize)
        model.train_on_batch(inputs, targets)

        #Updating the score and current state
        if env.collected:
            nCollected += 1

        currentState = nextState

    #Updating the epsilon and saving the model
    epsilon -= epsilonDecayRate
    epsilon = max(epsilon, minLastEpsilon)

    if nCollected > maxNCollected and nCollected > 2:
        model.save(filePathToSave)
        maxNCollected = nCollected
예제 #2
0
        #Taking an action
        if np.random.rand() <= epsilon:
            action = np.random.randint(0, 3)
        else:
            qvalues = model.predict(currentState)[0]
            action = np.argmax(qvalues)

        #Updating the environment
        nextState[0], reward, gameOver, _ = env.step(action)
        env.render()

        totReward += reward

        #Remembering new experience, training the AI and updating current state
        dqn.remember([currentState, action, reward, nextState], gameOver)
        inputs, targets = dqn.getBatch(model, batchSize)
        model.train_on_batch(inputs, targets)

        currentState = nextState

    #Lowering epsilon and displaying the results
    epsilon *= epsilonDecayRate

    print('Epoch: ' + str(epoch) + ' Epsilon: {:.5f}'.format(epsilon) +
          ' Total Reward: {:.2f}'.format(totReward))

    rewards.append(totReward)
    totReward = 0

    plt.plot(rewards)
    plt.xlabel('Epoch')