def PlotValueFunction(AI): if hasattr(AI, 'QueryQBestAction') and callable( getattr(AI, 'QueryQBestAction')): # Update plot of optimal value function (only of position and velocity)a X, Y = np.meshgrid(range(0, int(BASEY + 30), 20), range(-10, 10, 1)) Z = np.zeros(X.shape) for yy in xrange(X.shape[0]): for xx in xrange(X.shape[1]): Z[yy, xx] = AI.QueryQBestAction( RL.FB_GS(0, X[yy, xx], 0, Y[yy, xx], [{ 'x': 0, 'y': 0 }, { 'x': 0, 'y': 0 }], [{ 'x': 0, 'y': 0 }, { 'x': 0, 'y': 0 }])) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False) plt.savefig('optimalQ.png') plt.close(fig)
def mainGame(movementInfo, AI): score = playerIndex = loopIter = 0 playerIndexGen = movementInfo['playerIndexGen'] playerx, playery = int(SCREENWIDTH * 0.2), movementInfo['playery'] basex = movementInfo['basex'] baseShift = IMAGES['base'].get_width() - IMAGES['background'].get_width() # get 2 new pipes to add to upperPipes lowerPipes list newPipe1 = getRandomPipe() newPipe2 = getRandomPipe() # list of upper pipes upperPipes = [ { 'x': SCREENWIDTH + FIRST_PIPE_LOC, 'y': newPipe1[0]['y'] }, { 'x': SCREENWIDTH + FIRST_PIPE_LOC + (SCREENWIDTH / 2) + EXTRA_X_DIST, 'y': newPipe2[0]['y'] }, ] # list of lowerpipe lowerPipes = [ { 'x': SCREENWIDTH + FIRST_PIPE_LOC, 'y': newPipe1[1]['y'] }, { 'x': SCREENWIDTH + FIRST_PIPE_LOC + (SCREENWIDTH / 2) + EXTRA_X_DIST, 'y': newPipe2[1]['y'] }, ] pipeVelX = -4 # player velocity, max velocity, downward accleration, accleration on flap playerVelY = -9 # player's velocity along Y, default same as playerFlapped playerMaxVelY = 10 # max vel along Y, max descend speed playerMinVelY = -8 # min vel along Y, max ascend speed playerAccY = 1 # players downward accleration playerFlapAcc = -9 # players speed on flapping playerFlapped = False # True when player flaps curGameScore = 0 cum_discount = 1 timesteps = 0 while True: for event in pygame.event.get(): if event.type == QUIT or (event.type == KEYDOWN and event.key == K_ESCAPE): pygame.quit() sys.exit() if event.type == KEYDOWN and event.key == K_UP: global FPS FPS += 10 print 'FPS: ', FPS if event.type == KEYDOWN and event.key == K_DOWN: global FPS FPS -= 10 print 'FPS: ', FPS timesteps += 1 if timesteps % 5000 == 0: PlotValueFunction(AI) ## RL: Get player (bot) move GS = RL.FB_GS(playerx, playery, pipeVelX, playerVelY, upperPipes, lowerPipes) # sys.stdout.write(str(GS.GetMarkovRep())+ str(['{:.2f}'.format(x) for x in AI.weights])+' ' + '\r') botActionTaken = AI.MakeMove(GS) if botActionTaken == True: if playery > -2 * IMAGES['player'][0].get_height(): playerVelY = playerFlapAcc playerFlapped = True SOUNDS['wing'].play() else: playerFlapped = False # check for crash here crashTest = checkCrash( { 'x': playerx, 'y': playery, 'index': playerIndex }, upperPipes, lowerPipes) if crashTest[0]: ## RL: handle terminal state AI.Reinforce(GS, botActionTaken, 0, FEEDBACK_DEATH) AI.RestartEpisode() curGameScore += FEEDBACK_DEATH * cum_discount return { 'y': playery, 'groundCrash': crashTest[1], 'basex': basex, 'upperPipes': upperPipes, 'lowerPipes': lowerPipes, 'score': score, 'playerVelY': playerVelY, 'gameScore': curGameScore, 'gamePipeScore': score, } # check for score ## RL: initialize feedback to 0 feedback = FEEDBACK_LIFE # reward for staying alive playerMidPos = playerx + IMAGES['player'][0].get_width() / 2 for pipe in upperPipes: pipeMidPos = pipe['x'] + IMAGES['pipe'][0].get_width() / 2 if pipeMidPos <= playerMidPos < pipeMidPos + 4: ## RL: update feedback if non-0 (pass tru pipe) SOUNDS['point'].play() score += 1 pass # playerIndex basex change if (loopIter + 1) % 3 == 0: playerIndex = playerIndexGen.next() loopIter = (loopIter + 1) % 30 basex = -((-basex + 100) % baseShift) # player's movement if playerVelY < playerMaxVelY and not playerFlapped: playerVelY += playerAccY if playerFlapped: playerFlapped = False playerHeight = IMAGES['player'][playerIndex].get_height() playery += min(playerVelY, BASEY - playery - playerHeight) # move pipes to left for uPipe, lPipe in zip(upperPipes, lowerPipes): uPipe['x'] += pipeVelX lPipe['x'] += pipeVelX # add new pipe when first pipe is about to touch left of screen if 0 < upperPipes[0]['x'] < 5: newPipe = getRandomPipe() upperPipes.append(newPipe[0]) lowerPipes.append(newPipe[1]) # remove first pipe if its out of the screen if upperPipes[0]['x'] < -IMAGES['pipe'][0].get_width(): upperPipes.pop(0) lowerPipes.pop(0) ## RL: perform feedback NEXT_GS = RL.FB_GS(playerx, playery, pipeVelX, playerVelY, upperPipes, lowerPipes) AI.Reinforce(GS, botActionTaken, NEXT_GS, feedback) curGameScore += feedback * cum_discount cum_discount = cum_discount * GAMMA # draw sprites SCREEN.blit(IMAGES['background'], (0, 0)) for uPipe, lPipe in zip(upperPipes, lowerPipes): SCREEN.blit(IMAGES['pipe'][0], (uPipe['x'], uPipe['y'])) SCREEN.blit(IMAGES['pipe'][1], (lPipe['x'], lPipe['y'])) SCREEN.blit(IMAGES['base'], (basex, BASEY)) # print score so player overlaps the score showScore(score) SCREEN.blit(IMAGES['player'][playerIndex], (playerx, playery)) pygame.display.update() FPSCLOCK.tick(FPS)