Ejemplo n.º 1
0
def run_game(agent, game_number):
    # создать яблоко в позиции (20, 10)
    apple = Apple(
        Cell(WINDOW_WIDTH / WIDTH * round(WIDTH / 3),
             WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 2), DISPLAY))
    # создать змейку. Пусть она состоит из трех ячеек
    #  в строке 10 и столбцах 3, 4, 5.
    #  Какой тип данных удобен для представления змейки?
    snake = Snake(
        Cell(WINDOW_WIDTH / WIDTH * 4,
             WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY),
        Cell(WINDOW_WIDTH / WIDTH * 3,
             WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY),
        Cell(WINDOW_WIDTH / WIDTH * 2,
             WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY))

    # Reset player score.
    SCORE.player_score = 0

    # Initialize list containing tuples: (action, current state).
    action_state_list = []
    action_counter = 0

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            terminate()
        # обработайте событие pygame.KEYDOWN
        #  и при необходимости измените направление движения змейки.
        if event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE:
            # ESC key pressed
            pygame.quit()
            sys.exit()
        if event.type == pygame.KEYDOWN:
            snake.direction = get_direction(
                event.key, snake.direction)  # Other key pressed
            # If any direction key was pressed - assign corresponding action.
            action = snake.direction

    steps = 0  # steps since the last positive reward
    while (not any(
        (snake_hit_self(snake), snake_hit_edge(snake)))) and (steps < 100):
        # Before snake does its first move, assign action = 'None'.
        action_counter += 1
        action = 'none'
        # Previous snake direction. We'll use as one of the current state parameters for evaluation.
        snake_prev_direction = snake.direction

        if not params['train']:
            agent.epsilon = 0.01
        else:
            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 1 - (game_number * params['epsilon_decay_linear'])

        # get previous environment state.
        state_old = get_state(
            get_state_in_json(player_score=SCORE.player_score,
                              high_score=SCORE.high_score,
                              head_pos=get_snake_new_head(snake),
                              snake_pos=snake.body,
                              apple_pos=apple.apple,
                              prev_direction=snake_prev_direction))
        head_apple_distance_old_x, head_apple_distance_old_y = abs(get_snake_new_head(snake)[0] - apple.apple.x),\
                                                               abs(get_snake_new_head(snake)[1] - apple.apple.y)

        # perform random actions based on agent.epsilon, or choose the action
        if random.uniform(0, 1) < agent.epsilon:
            snake.turn(matrix=np.eye(3)[random.randint(0, 2)],
                       prev_direction=snake_prev_direction,
                       move_list=SNAKE_MOVE)
        else:
            # predict action based on the old state
            with torch.no_grad():
                state_old_tensor = torch.tensor(state_old.reshape((1, 12)),
                                                dtype=torch.float32).to(DEVICE)
                prediction = agent(state_old_tensor)
                snake.turn(matrix=np.eye(3)[np.argmax(
                    prediction.detach().cpu().numpy()[0])],
                           prev_direction=snake_prev_direction,
                           move_list=SNAKE_MOVE)

        # сдвинуть змейку в заданном направлении
        snake.move()

        # обработайте ситуацию столкновения змейки с яблоком.
        #  В этом случае нужно:
        #  * Увеличить размер змейки
        #  * Создать новое яблоко.
        if snake_hit_apple(snake, apple):
            snake.grow()
            apple.spawn([(block.x, block.y) for block in snake.body
                         ])  # check apple does not spawn on snake.
            SCORE.score()

        # Calculate new environment state after snake moved.
        state_new = get_state(
            get_state_in_json(player_score=SCORE.player_score,
                              high_score=SCORE.high_score,
                              head_pos=get_snake_new_head(snake),
                              snake_pos=snake.body,
                              apple_pos=apple.apple,
                              prev_direction=snake_prev_direction))
        head_apple_distance_new_x, head_apple_distance_new_y = abs(get_snake_new_head(snake)[0] - apple.apple.x),\
                                                               abs(get_snake_new_head(snake)[1] - apple.apple.y)

        # Set reward for the new state.
        reward = agent.set_reward(snake, apple, head_apple_distance_new_x,
                                  head_apple_distance_old_x,
                                  head_apple_distance_new_y,
                                  head_apple_distance_old_y)

        # If snake hit apple or moved towards it, reset steps counter to 0.
        if reward > 0:
            steps = 0

        if params['train']:
            # train short memory base on the new action and state
            agent.train_short_memory(
                state_old, snake.turn_direction, reward, state_new,
                any((snake_hit_self(snake), snake_hit_edge(snake))))
            # store the new data into a long term memory
            agent.remember(state_old, snake.turn_direction, reward, state_new,
                           any((snake_hit_self(snake), snake_hit_edge(snake))))

        # передать яблоко в функцию отрисовки кадра
        # передать змейку в функцию отрисовки кадра
        if params['display']:
            draw_frame(snake, apple, SCORE)
        FPS_CLOCK.tick(FPS)

        steps += 1

        # Appending the current action (could be 'none') and the current state of the snake to
        # the list - "Action-State List".
        action_state_list.append(
            ({
                f"Action {action_counter}": action
            },
             get_state_in_json(player_score=SCORE.player_score,
                               high_score=SCORE.high_score,
                               head_pos=get_snake_new_head(snake),
                               snake_pos=snake.body,
                               apple_pos=apple.apple,
                               prev_direction=snake_prev_direction)))
        # "Action-State List" to current game and write json on disk.
        STATE[f"Game #{game_number}"] = action_state_list

    # если змейка достигла границы окна, завершить игру.
    #  Для проверки воспользуйтесь функцией snake_hit_edge.
    if snake_hit_edge(snake):
        write_state_to_file(STATE, CURRENT_TIME)

    # если змейка задела свой хвост, завершить игру.
    #  Для проверки восппользуйтесь функцией snake_hit_self.
    if snake_hit_self(snake):
        write_state_to_file(STATE, CURRENT_TIME)
Ejemplo n.º 2
0
                    manager.process_events(event)

                snake.move()
                Xdata = get_Xdata(snake, food, steps)
                if Xdata is None:
                    snake.kill()
                if not snake.is_alive:
                    population_list.pop(i, None)
                    scores[i] = [snake.length-2, steps]
                    break
                Ydata = nn_list[i](Xdata)
                
                out = numpy.where(Ydata == numpy.max(Ydata))[0][0]
                
                if out == 0 and snake.direction != 'DOWN':
                    snake.turn('UP')
                elif out == 1 and snake.direction != 'LEFT':
                    snake.turn('RIGHT')
                elif out == 2 and snake.direction != 'UP':
                    snake.turn('DOWN')
                elif out ==  3 and snake.direction != 'RIGHT':
                    snake.turn('LEFT')
              
                food.respawn(food.x, food.y)
                if (steps - snake.last_food) >= 100:
                    snake.kill()
       
                steps += 1
                textsurf = font.render('Generation: {0}'.format(generation, i), False, (0, 0, 0))
                textsurf2 = font.render('Snake: {0}'.format(i), False, (0, 0, 0))
                textsurf3 = font.render('Score: {0}'.format(snake.length-2), False, (0, 0, 0))