def run_game(agent, game_number): # создать яблоко в позиции (20, 10) apple = Apple( Cell(WINDOW_WIDTH / WIDTH * round(WIDTH / 3), WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 2), DISPLAY)) # создать змейку. Пусть она состоит из трех ячеек # в строке 10 и столбцах 3, 4, 5. # Какой тип данных удобен для представления змейки? snake = Snake( Cell(WINDOW_WIDTH / WIDTH * 4, WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY), Cell(WINDOW_WIDTH / WIDTH * 3, WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY), Cell(WINDOW_WIDTH / WIDTH * 2, WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY)) # Reset player score. SCORE.player_score = 0 # Initialize list containing tuples: (action, current state). action_state_list = [] action_counter = 0 for event in pygame.event.get(): if event.type == pygame.QUIT: terminate() # обработайте событие pygame.KEYDOWN # и при необходимости измените направление движения змейки. if event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE: # ESC key pressed pygame.quit() sys.exit() if event.type == pygame.KEYDOWN: snake.direction = get_direction( event.key, snake.direction) # Other key pressed # If any direction key was pressed - assign corresponding action. action = snake.direction steps = 0 # steps since the last positive reward while (not any( (snake_hit_self(snake), snake_hit_edge(snake)))) and (steps < 100): # Before snake does its first move, assign action = 'None'. action_counter += 1 action = 'none' # Previous snake direction. We'll use as one of the current state parameters for evaluation. snake_prev_direction = snake.direction if not params['train']: agent.epsilon = 0.01 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (game_number * params['epsilon_decay_linear']) # get previous environment state. state_old = get_state( get_state_in_json(player_score=SCORE.player_score, high_score=SCORE.high_score, head_pos=get_snake_new_head(snake), snake_pos=snake.body, apple_pos=apple.apple, prev_direction=snake_prev_direction)) head_apple_distance_old_x, head_apple_distance_old_y = abs(get_snake_new_head(snake)[0] - apple.apple.x),\ abs(get_snake_new_head(snake)[1] - apple.apple.y) # perform random actions based on agent.epsilon, or choose the action if random.uniform(0, 1) < agent.epsilon: snake.turn(matrix=np.eye(3)[random.randint(0, 2)], prev_direction=snake_prev_direction, move_list=SNAKE_MOVE) else: # predict action based on the old state with torch.no_grad(): state_old_tensor = torch.tensor(state_old.reshape((1, 12)), dtype=torch.float32).to(DEVICE) prediction = agent(state_old_tensor) snake.turn(matrix=np.eye(3)[np.argmax( prediction.detach().cpu().numpy()[0])], prev_direction=snake_prev_direction, move_list=SNAKE_MOVE) # сдвинуть змейку в заданном направлении snake.move() # обработайте ситуацию столкновения змейки с яблоком. # В этом случае нужно: # * Увеличить размер змейки # * Создать новое яблоко. if snake_hit_apple(snake, apple): snake.grow() apple.spawn([(block.x, block.y) for block in snake.body ]) # check apple does not spawn on snake. SCORE.score() # Calculate new environment state after snake moved. state_new = get_state( get_state_in_json(player_score=SCORE.player_score, high_score=SCORE.high_score, head_pos=get_snake_new_head(snake), snake_pos=snake.body, apple_pos=apple.apple, prev_direction=snake_prev_direction)) head_apple_distance_new_x, head_apple_distance_new_y = abs(get_snake_new_head(snake)[0] - apple.apple.x),\ abs(get_snake_new_head(snake)[1] - apple.apple.y) # Set reward for the new state. reward = agent.set_reward(snake, apple, head_apple_distance_new_x, head_apple_distance_old_x, head_apple_distance_new_y, head_apple_distance_old_y) # If snake hit apple or moved towards it, reset steps counter to 0. if reward > 0: steps = 0 if params['train']: # train short memory base on the new action and state agent.train_short_memory( state_old, snake.turn_direction, reward, state_new, any((snake_hit_self(snake), snake_hit_edge(snake)))) # store the new data into a long term memory agent.remember(state_old, snake.turn_direction, reward, state_new, any((snake_hit_self(snake), snake_hit_edge(snake)))) # передать яблоко в функцию отрисовки кадра # передать змейку в функцию отрисовки кадра if params['display']: draw_frame(snake, apple, SCORE) FPS_CLOCK.tick(FPS) steps += 1 # Appending the current action (could be 'none') and the current state of the snake to # the list - "Action-State List". action_state_list.append( ({ f"Action {action_counter}": action }, get_state_in_json(player_score=SCORE.player_score, high_score=SCORE.high_score, head_pos=get_snake_new_head(snake), snake_pos=snake.body, apple_pos=apple.apple, prev_direction=snake_prev_direction))) # "Action-State List" to current game and write json on disk. STATE[f"Game #{game_number}"] = action_state_list # если змейка достигла границы окна, завершить игру. # Для проверки воспользуйтесь функцией snake_hit_edge. if snake_hit_edge(snake): write_state_to_file(STATE, CURRENT_TIME) # если змейка задела свой хвост, завершить игру. # Для проверки восппользуйтесь функцией snake_hit_self. if snake_hit_self(snake): write_state_to_file(STATE, CURRENT_TIME)
manager.process_events(event) snake.move() Xdata = get_Xdata(snake, food, steps) if Xdata is None: snake.kill() if not snake.is_alive: population_list.pop(i, None) scores[i] = [snake.length-2, steps] break Ydata = nn_list[i](Xdata) out = numpy.where(Ydata == numpy.max(Ydata))[0][0] if out == 0 and snake.direction != 'DOWN': snake.turn('UP') elif out == 1 and snake.direction != 'LEFT': snake.turn('RIGHT') elif out == 2 and snake.direction != 'UP': snake.turn('DOWN') elif out == 3 and snake.direction != 'RIGHT': snake.turn('LEFT') food.respawn(food.x, food.y) if (steps - snake.last_food) >= 100: snake.kill() steps += 1 textsurf = font.render('Generation: {0}'.format(generation, i), False, (0, 0, 0)) textsurf2 = font.render('Snake: {0}'.format(i), False, (0, 0, 0)) textsurf3 = font.render('Score: {0}'.format(snake.length-2), False, (0, 0, 0))