action = np.random.randint(0, 4) else: qvalues = model.predict(currentState)[0] action = np.argmax(qvalues) #Updating the Environment frame, reward, gameOver = env.step(action) #We need to reshape the frame(2D) to add it to the nextState (4D) frame = np.reshape(frame, (1, env.nColumns, env.nRows, 1)) nextState = np.append(nextState, frame, axis=3) nextState = np.delete(nextState, 0, axis=3) #Remembering new experience and training the AI DQN.remember([currentState, action, reward, nextState], gameOver) inputs, targets = DQN.getBatch(model, batchSize) model.train_on_batch(inputs, targets) #Updating the score and current state if env.collected: nCollected += 1 currentState = nextState #Updating the epsilon and saving the model epsilon -= epsilonDecayRate epsilon = max(epsilon, minLastEpsilon) if nCollected > maxNCollected and nCollected > 2: model.save(filePathToSave) maxNCollected = nCollected
#Taking an action if np.random.rand() <= epsilon: action = np.random.randint(0, 3) else: qvalues = model.predict(currentState)[0] action = np.argmax(qvalues) #Updating the environment nextState[0], reward, gameOver, _ = env.step(action) env.render() totReward += reward #Remembering new experience, training the AI and updating current state dqn.remember([currentState, action, reward, nextState], gameOver) inputs, targets = dqn.getBatch(model, batchSize) model.train_on_batch(inputs, targets) currentState = nextState #Lowering epsilon and displaying the results epsilon *= epsilonDecayRate print('Epoch: ' + str(epoch) + ' Epsilon: {:.5f}'.format(epsilon) + ' Total Reward: {:.2f}'.format(totReward)) rewards.append(totReward) totReward = 0 plt.plot(rewards) plt.xlabel('Epoch')