Exemple #1
0
def game():
    # Setup
    score = 0
    global highscore
    grid = []
    for x in range(width):
        grid.append([])
        for y in range(height):
            grid[x].append(0)

    head = Segment(width / 2, height / 5)
    body = Segment(head.X, head.Y + 1)
    tail = Segment(body.X, head.Y + 1)
    head.setNext(body)
    body.setNext(tail)
    snake = Snake(head, tail)
    apple = Apple(width, height)

    #Game loop
    finished = False
    while not finished:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                finished = True

        # Change direction
        keyPressed = pygame.key.get_pressed()

        if snake.direction == "up" or snake.direction == "down":
            if keyPressed[pygame.K_RIGHT]: snake.direction = "right"
            if keyPressed[pygame.K_LEFT]: snake.direction = "left"
        if snake.direction == "right" or snake.direction == "left":
            if keyPressed[pygame.K_UP]: snake.direction = "up"
            if keyPressed[pygame.K_DOWN]: snake.direction = "down"

        snake.move()

        # Check if collected an apple
        if snake.head.X == apple.X and snake.head.Y == apple.Y:
            snake.grow()
            score += 1
            if score > highscore:
                highscore = score
            appleGen = False
            while not appleGen:
                apple.generate()
                if grid[apple.X][apple.Y] == 0:
                    appleGen = True

        # Death check
        if snake.isDead(height, width):
            finished = True
            break

        draw(grid, snake, apple, score)
        clock.tick(fps)

    gameOver()
Exemple #2
0
def run():
    alpha = 0.15  #learning rate
    gamma = 0.7  #discount factor
    epsilon = 0  #exploration value
    demoEpsilon = 0  #Epsilon value for the demo mode
    dynamicEpsilon = False  #If True, the exploration rate will be reducing as more of the state-space is dicovered, if False the rate will be static
    training_episodes = 1000000
    deathAfter = 250  #Kill the snake after a certain number of moves to prevent it from getting stuck in a cycle
    qTableSave = os.path.join(os.sys.path[0], "saves", "QTable.txt")
    counterSave = os.path.join(os.sys.path[0], "saves", "GameCounter.txt")
    fps = 30
    displayMode = "Demo"

    # Load existing Q-Table and episode counter or create new ones
    numberOfStates = init_state_dict()
    Q = np.zeros((numberOfStates, len(actions)))
    try:
        Q = np.loadtxt(qTableSave).reshape(numberOfStates, len(actions))
    except:
        print("Could not load an existing Q-Table")
    try:
        counterFile = open(counterSave, "r")
        startEpisode = int(counterFile.read())
        counterFile.close()
    except:
        startEpisode = 0

    highscore = 0
    for episode in range(startEpisode, training_episodes + 1):
        killApp = False
        try:
            if (episode % 100 == 0):
                count = 0
                qFile = open(qTableSave, "w")
                for row in Q:
                    if not "0. 0. 0." in str(row):
                        count += 1
                    np.savetxt(qFile, row)
                qFile.close()
                counterFile = open(counterSave, "w")
                counterFile.write(str(episode))
                counterFile.close
                exploredPercentage = count / (numberOfStates)
                if dynamicEpsilon:
                    if exploredPercentage < 0.95:
                        epsilon = (
                            1 - exploredPercentage
                        ) / 2  #Reduce exploration value as more states get explored
                    else:
                        epsilon = 0.05
                print(
                    str(exploredPercentage * 100) + "%" +
                    " of state-space explored")
        except:
            print("Save error")

        # Initialise the game grid and the score counter
        score = 0
        grid = []
        for x in range(width):
            grid.append([])
            for y in range(height):
                grid[x].append(0)

        # Create the snake and the apple
        head = Segment(width / 2, height / 5)
        body = Segment(head.X, head.Y + 1)
        tail = Segment(body.X, head.Y + 1)
        head.setNext(body)
        body.setNext(tail)
        snake = Snake(head, tail)
        apple = Apple(width, height)
        startStateID = state_dict[getState(snake, apple)]
        nextStateID = startStateID
        prevDistToApple = distanceToApple(snake, apple)
        deathCountdown = deathAfter

        #Game loop
        finished = False
        paused = False
        while not finished:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    killApp = True

            # Pause/unpause, switch the mode between training and demo
            keyPressed = pygame.key.get_pressed()
            if keyPressed[pygame.K_SPACE]:
                paused = True
            elif keyPressed[pygame.K_RETURN]:
                paused = False
            elif keyPressed[pygame.K_UP]:
                fps = 10000
                displayMode = "Training"
                dynamicEpsilon = True
            elif keyPressed[pygame.K_DOWN]:
                fps = 30
                displayMode = "Demo"
                dynamicEpsilon = False
                epsilon = demoEpsilon
            elif keyPressed[pygame.K_END]:
                killApp = True

            if not paused:
                reward = 0
                stateID = nextStateID
                # Choose action
                if random.uniform(0, 1) < epsilon:
                    unexplored = False
                    for i in range(
                            0, 3
                    ):  #Unlike classic Q-Learning, the algorithm prefers to go to a previously unexplored state, instead of choosing an action randomly
                        if Q[stateID, i] == 0.:
                            actionID = i
                            unexplored = True
                            break
                    if not unexplored:  #If no unexplored states were found, choose a random action
                        actionID = random.randint(0, 2)
                else:
                    actionID = np.argmax(Q[stateID])

                # Change direction
                if actions[actionID] != "wait":
                    if snake.direction == "up":
                        snake.direction = actions[actionID]
                    elif snake.direction == "down":
                        if actions[actionID] == "right":
                            snake.direction = "left"
                        if actions[actionID] == "left":
                            snake.direction = "right"
                    elif snake.direction == "left":
                        if actions[actionID] == "right": snake.direction = "up"
                        if actions[actionID] == "left":
                            snake.direction = "down"
                    elif snake.direction == "right":
                        if actions[actionID] == "right":
                            snake.direction = "down"
                        if actions[actionID] == "left": snake.direction = "up"

                snake.move()

                distToApple = distanceToApple(snake, apple)
                # Check if collected an apple
                if snake.head.X == apple.X and snake.head.Y == apple.Y:
                    snake.grow()
                    score += 1
                    if score > highscore:
                        highscore = score
                    reward += 500
                    deathCountdown = deathAfter
                    appleGen = False
                    while not appleGen:
                        apple.generate()
                        if grid[apple.X][apple.Y] == 0:
                            appleGen = True
                            prevDistToApple = distanceToApple(snake, apple)
                else:
                    deathCountdown -= 1
                    distToApple = distanceToApple(snake, apple)
                    if distToApple >= prevDistToApple:
                        reward -= 5
                    else:
                        reward += 1
                    prevDistToApple = distToApple

                # Death check
                if snake.isDead(height, width) or deathCountdown <= 0:
                    finished = True
                    reward -= 10000
                    nextStateID = startStateID
                    oldQ = Q[stateID, actionID]
                    nextMax = -10000
                    updatedQ = (1 - alpha) * oldQ + alpha * (reward +
                                                             gamma * nextMax)
                    Q[stateID, actionID] = updatedQ
                else:
                    nextState = getState(snake, apple)
                    nextStateID = state_dict[nextState]
                    nextMax = np.max(Q[nextStateID])
                    oldQ = Q[stateID, actionID]
                    updatedQ = (1 - alpha) * oldQ + alpha * (reward +
                                                             gamma * nextMax)
                    Q[stateID, actionID] = updatedQ

                    draw(grid, snake, apple, score, episode, highscore,
                         displayMode)

            clock.tick(fps)
            if killApp == True: break

        if killApp == True: break
Exemple #3
0
def run():
    deathAfter = 250  #Kill the snake after a certain number of moves to prevent it from getting stuck in a cycle
    qTableSave = os.path.join(os.sys.path[0], "saves", "QTableDemo.txt")
    fps = 30

    # Load existing Q-Table
    numberOfStates = init_state_dict()
    Q = np.zeros((numberOfStates, len(actions)))
    try:
        Q = np.loadtxt(qTableSave).reshape(numberOfStates, len(actions))
    except:
        print("Failed to load the Q-Table")

    highscore = 0
    killApp = False
    while not killApp:
        # Initialise the game grid and the score counter
        score = 0
        grid = []
        for x in range(width):
            grid.append([])
            for y in range(height):
                grid[x].append(0)

        # Create the snake and the apple
        head = Segment(width / 2, height / 5)
        body = Segment(head.X, head.Y + 1)
        tail = Segment(body.X, head.Y + 1)
        head.setNext(body)
        body.setNext(tail)
        snake = Snake(head, tail)
        apple = Apple(width, height)
        startStateID = state_dict[getState(snake, apple)]
        nextStateID = startStateID
        prevDistToApple = distanceToApple(snake, apple)
        deathCountdown = deathAfter

        #Game loop
        finished = False
        paused = False
        while not finished:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    killApp = True

            # Pause/unpause
            keyPressed = pygame.key.get_pressed()
            if keyPressed[pygame.K_SPACE]:
                paused = True
            elif keyPressed[pygame.K_RETURN]:
                paused = False
            elif keyPressed[pygame.K_END]:
                killApp = True

            if not paused:
                reward = 0
                stateID = nextStateID
                # Choose action
                actionID = np.argmax(Q[stateID])
                # Change direction
                if actions[actionID] != "wait":
                    if snake.direction == "up":
                        snake.direction = actions[actionID]
                    elif snake.direction == "down":
                        if actions[actionID] == "right":
                            snake.direction = "left"
                        if actions[actionID] == "left":
                            snake.direction = "right"
                    elif snake.direction == "left":
                        if actions[actionID] == "right": snake.direction = "up"
                        if actions[actionID] == "left":
                            snake.direction = "down"
                    elif snake.direction == "right":
                        if actions[actionID] == "right":
                            snake.direction = "down"
                        if actions[actionID] == "left": snake.direction = "up"

                snake.move()

                distToApple = distanceToApple(snake, apple)
                # Check if collected an apple
                if snake.head.X == apple.X and snake.head.Y == apple.Y:
                    snake.grow()
                    score += 1
                    if score > highscore:
                        highscore = score
                    reward += 500
                    deathCountdown = deathAfter
                    appleGen = False
                    while not appleGen:
                        apple.generate()
                        if grid[apple.X][apple.Y] == 0:
                            appleGen = True
                            prevDistToApple = distanceToApple(snake, apple)
                else:
                    deathCountdown -= 1
                    distToApple = distanceToApple(snake, apple)
                    if distToApple >= prevDistToApple:
                        reward -= 5
                    else:
                        reward += 1
                    prevDistToApple = distToApple

                # Death check
                if snake.isDead(height, width) or deathCountdown <= 0:
                    finished = True
                else:
                    nextState = getState(snake, apple)
                    nextStateID = state_dict[nextState]
                    draw(grid, snake, apple, score, highscore)

            clock.tick(fps)
            if killApp == True: break

        if killApp == True: break