Esempio n. 1
0
def PlotValueFunction(AI):
    if hasattr(AI, 'QueryQBestAction') and callable(
            getattr(AI, 'QueryQBestAction')):
        # Update plot of optimal value function (only of position and velocity)a
        X, Y = np.meshgrid(range(0, int(BASEY + 30), 20), range(-10, 10, 1))

        Z = np.zeros(X.shape)
        for yy in xrange(X.shape[0]):
            for xx in xrange(X.shape[1]):
                Z[yy, xx] = AI.QueryQBestAction(
                    RL.FB_GS(0, X[yy, xx], 0, Y[yy, xx], [{
                        'x': 0,
                        'y': 0
                    }, {
                        'x': 0,
                        'y': 0
                    }], [{
                        'x': 0,
                        'y': 0
                    }, {
                        'x': 0,
                        'y': 0
                    }]))

        fig = plt.figure()
        ax = fig.gca(projection='3d')
        ax.plot_surface(X,
                        Y,
                        Z,
                        rstride=1,
                        cstride=1,
                        cmap=cm.coolwarm,
                        linewidth=0,
                        antialiased=False)
        plt.savefig('optimalQ.png')
        plt.close(fig)
Esempio n. 2
0
def mainGame(movementInfo, AI):
    score = playerIndex = loopIter = 0
    playerIndexGen = movementInfo['playerIndexGen']
    playerx, playery = int(SCREENWIDTH * 0.2), movementInfo['playery']

    basex = movementInfo['basex']
    baseShift = IMAGES['base'].get_width() - IMAGES['background'].get_width()

    # get 2 new pipes to add to upperPipes lowerPipes list
    newPipe1 = getRandomPipe()
    newPipe2 = getRandomPipe()

    # list of upper pipes
    upperPipes = [
        {
            'x': SCREENWIDTH + FIRST_PIPE_LOC,
            'y': newPipe1[0]['y']
        },
        {
            'x':
            SCREENWIDTH + FIRST_PIPE_LOC + (SCREENWIDTH / 2) + EXTRA_X_DIST,
            'y': newPipe2[0]['y']
        },
    ]

    # list of lowerpipe
    lowerPipes = [
        {
            'x': SCREENWIDTH + FIRST_PIPE_LOC,
            'y': newPipe1[1]['y']
        },
        {
            'x':
            SCREENWIDTH + FIRST_PIPE_LOC + (SCREENWIDTH / 2) + EXTRA_X_DIST,
            'y': newPipe2[1]['y']
        },
    ]

    pipeVelX = -4

    # player velocity, max velocity, downward accleration, accleration on flap
    playerVelY = -9  # player's velocity along Y, default same as playerFlapped
    playerMaxVelY = 10  # max vel along Y, max descend speed
    playerMinVelY = -8  # min vel along Y, max ascend speed
    playerAccY = 1  # players downward accleration
    playerFlapAcc = -9  # players speed on flapping
    playerFlapped = False  # True when player flaps

    curGameScore = 0
    cum_discount = 1
    timesteps = 0
    while True:
        for event in pygame.event.get():
            if event.type == QUIT or (event.type == KEYDOWN
                                      and event.key == K_ESCAPE):
                pygame.quit()
                sys.exit()
            if event.type == KEYDOWN and event.key == K_UP:
                global FPS
                FPS += 10
                print 'FPS: ', FPS
            if event.type == KEYDOWN and event.key == K_DOWN:
                global FPS
                FPS -= 10
                print 'FPS: ', FPS
        timesteps += 1
        if timesteps % 5000 == 0:
            PlotValueFunction(AI)
        ## RL: Get player (bot) move
        GS = RL.FB_GS(playerx, playery, pipeVelX, playerVelY, upperPipes,
                      lowerPipes)
        # sys.stdout.write(str(GS.GetMarkovRep())+ str(['{:.2f}'.format(x) for x in AI.weights])+'                                        ' + '\r')
        botActionTaken = AI.MakeMove(GS)

        if botActionTaken == True:
            if playery > -2 * IMAGES['player'][0].get_height():
                playerVelY = playerFlapAcc
                playerFlapped = True
                SOUNDS['wing'].play()
        else:
            playerFlapped = False

        # check for crash here
        crashTest = checkCrash(
            {
                'x': playerx,
                'y': playery,
                'index': playerIndex
            }, upperPipes, lowerPipes)
        if crashTest[0]:

            ## RL: handle terminal state
            AI.Reinforce(GS, botActionTaken, 0, FEEDBACK_DEATH)
            AI.RestartEpisode()
            curGameScore += FEEDBACK_DEATH * cum_discount

            return {
                'y': playery,
                'groundCrash': crashTest[1],
                'basex': basex,
                'upperPipes': upperPipes,
                'lowerPipes': lowerPipes,
                'score': score,
                'playerVelY': playerVelY,
                'gameScore': curGameScore,
                'gamePipeScore': score,
            }

        # check for score
        ## RL: initialize feedback to 0
        feedback = FEEDBACK_LIFE  # reward for staying alive
        playerMidPos = playerx + IMAGES['player'][0].get_width() / 2
        for pipe in upperPipes:
            pipeMidPos = pipe['x'] + IMAGES['pipe'][0].get_width() / 2
            if pipeMidPos <= playerMidPos < pipeMidPos + 4:
                ## RL: update feedback if non-0 (pass tru pipe)
                SOUNDS['point'].play()
                score += 1
                pass

        # playerIndex basex change
        if (loopIter + 1) % 3 == 0:
            playerIndex = playerIndexGen.next()
        loopIter = (loopIter + 1) % 30
        basex = -((-basex + 100) % baseShift)

        # player's movement
        if playerVelY < playerMaxVelY and not playerFlapped:
            playerVelY += playerAccY
        if playerFlapped:
            playerFlapped = False
        playerHeight = IMAGES['player'][playerIndex].get_height()
        playery += min(playerVelY, BASEY - playery - playerHeight)

        # move pipes to left
        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            uPipe['x'] += pipeVelX
            lPipe['x'] += pipeVelX

        # add new pipe when first pipe is about to touch left of screen
        if 0 < upperPipes[0]['x'] < 5:
            newPipe = getRandomPipe()
            upperPipes.append(newPipe[0])
            lowerPipes.append(newPipe[1])

        # remove first pipe if its out of the screen
        if upperPipes[0]['x'] < -IMAGES['pipe'][0].get_width():
            upperPipes.pop(0)
            lowerPipes.pop(0)

        ## RL: perform feedback
        NEXT_GS = RL.FB_GS(playerx, playery, pipeVelX, playerVelY, upperPipes,
                           lowerPipes)
        AI.Reinforce(GS, botActionTaken, NEXT_GS, feedback)
        curGameScore += feedback * cum_discount
        cum_discount = cum_discount * GAMMA

        # draw sprites
        SCREEN.blit(IMAGES['background'], (0, 0))

        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            SCREEN.blit(IMAGES['pipe'][0], (uPipe['x'], uPipe['y']))
            SCREEN.blit(IMAGES['pipe'][1], (lPipe['x'], lPipe['y']))

        SCREEN.blit(IMAGES['base'], (basex, BASEY))
        # print score so player overlaps the score
        showScore(score)
        SCREEN.blit(IMAGES['player'][playerIndex], (playerx, playery))

        pygame.display.update()
        FPSCLOCK.tick(FPS)