def is_direction_blocked(snake: Snake, enemy: Snake, direction): point = np.array(snake.get_head_position()) + np.array(direction) #print(point) return point.tolist() in snake.positions[:-1] \ or point.tolist() in enemy.positions \ or point[0] == 0 or point[1] == 0 \ or point[0] == SIZE or point[1] == SIZE
def enemyMovement(food: Food, enemy: Snake): enemy_epsilon = 0.5 direction = 0 if np.random.random() <= enemy_epsilon: direction = random.randint(0, 3) else: if food.position[0] < enemy.get_head_position()[0]: direction = 2 elif food.position[0] > enemy.get_head_position()[0]: direction = 3 elif food.position[1] < enemy.get_head_position()[1]: direction = 0 elif food.position[1] > enemy.get_head_position()[1]: direction = 1 else: direction = random.randint(0, 3) enemy.action([direction], "Enemy")
if obs_type == "Grid": obs = getObsGrid(snakes, food, OBS_GRID_SIZE, fullGrid=False) if obs_type == "Small": obs = getObsSmall(snakes, food) if player.reward == -DEATH_PENALTY: done = True info = "" return obs, player.reward, done, info score_history = [] score = 0 n_steps_history = [] for i in tqdm(range(num_episodes)): player = Snake(0) enemy = Snake(1) enemy.positions = [(random.randint(0, SIZE - 1), random.randint(0, SIZE - 1))] food = Food([player]) done = False score = 0 # returns a numpy array of the state we care about observation = getObsGrid(snakes=[player], food=food, size=OBS_GRID_SIZE, fullGrid=False) #observation = getObsSmall([player, enemy], food) n_steps = 0 while not done and n_steps < 100: n_steps += 1 # action needs to be either 0,1,2 or 3 action = agentAC.choose_action(observation) observation_, reward, done, info = step(snakes=[player, enemy], food=food, action=action, obs_type=obs_type) agentAC.learn(observation, reward, observation_, done) # For Actor-Critic
# if start_q_table is None: # q_table = {} # for x1 in range(-SIZE+1, SIZE): # for y1 in range(-SIZE+1, SIZE): # q_table[(x1, y1)] = [np.random.uniform(-5, 0) for i in range(4)] if start_q_table is not None: print("Loaded q-table") with open(start_q_table, "rb") as f: q_table = pickle.load(f) episode_rewards = [] episode_scores = [] for episode in range(HM_EPISODES): player = Snake(0) enemy = Snake(1) food = Food([player, enemy]) show = False if episode % SHOW_EVERY == 0: print(f"on #{episode}, epsilon is {epsilon}") print( f"{SHOW_EVERY} ep mean: {np.mean(episode_rewards[-SHOW_EVERY:])}" ) print( f"{SHOW_EVERY} ep mean score: {np.mean(episode_scores[-SHOW_EVERY:])}" ) print(episode_scores) else: show = False
def display(): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 10 enemy = Snake(1) enemy.positions = [(random.randint(0, SIZE - 1), random.randint(0, SIZE - 1))] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 0): for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_1: print("SPace") clock.tick(0) #print(get_food_distance(player, food)) #getObsSmall([player, enemy], food) clock.tick(1) drawGrid(surface) enemy.handle_keys() obs = (player - food) # no random action in display phase action_space = np.array(q_table[obs]).copy() player.action(action_space, "QL") enemy.move() handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) getObsGrid([player, enemy], food, size=7, fullGrid=True) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
def render(agent, obs_type: str, board_size: int): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 5 enemy = Snake(1) enemy.lives = 5 enemy.positions = [[5,5]] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 4 and enemy.lives > 4): clock.tick(4) drawGrid(surface) enemyMovement(food=food,enemy=enemy) # no random action in display phase if obs_type == "Grid": obs = getObsGrid([player], food, size=board_size, fullGrid=False) if obs_type == 'Small': obs = getObsSmall([player, enemy], food) action = agent.choose_action(obs) action_space = [action] player.action(action_space, "PG") handleSnakeCollisions(player,enemy) handleFoodEating([player, enemy], food) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) #text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) #screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
def get_enemy_direction_vector(snake: Snake, enemy: Snake): return np.array(enemy.get_head_position()) - np.array(snake.get_head_position())
def get_food_direction_vector(snake: Snake, food: Food): return np.array(food.position) - np.array(snake.get_head_position())
info = "" return obs, player.reward, done, info if __name__ == '__main__': if agent_type == "AC": agent = ActorCriticAgent(alpha=0.001, input_dims=6, gamma=0.99, layer1_size=128, layer2_size=64, n_actions=4) agent.actor_critic = T.load("model/AC-Small-5000") score_history = [] score = 0 n_steps_history = [] for i in tqdm(range(num_episodes)): player = Snake(0) enemy = Snake(1) food = Food([player, enemy]) done = False score = 0 # returns a numpy array of the state we care about if obs_type == "Grid": observation = getObsGrid(snakes=[player], food=food, size=OBS_GRID_SIZE, fullGrid=False) if obs_type == 'Small': observation = getObsSmall([player, enemy], food) n_steps = 0 while player.lives > 0 and enemy.lives > 0 and n_steps < 1000: n_steps += 1 # action needs to be either 0,1,2 or 3 action = agent.choose_action(observation) observation_, reward, done, info = step(snakes=[player, enemy], food=food, action=action)