def displayRL(player, food, enemy): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) myfont = pygame.font.SysFont("bahnschrift", 20) max_iter = 0 while (player.lives > 4): max_iter += 1 if max_iter == 50: break clock.tick(7) drawGrid(surface) obs = (player - food, player.get_head_position()) # obs = (player - food) # no random action in display phase action_space = np.array(q_table[obs]).copy() player.action(action_space, "QL") handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score Player {0}".format(player.score), 1, (250, 250, 250)) # text2 = myfont.render("Score AI {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) # screen.blit(text2, (SCREEN_WIDTH - 120, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
def display(): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 10 enemy = Snake(1) enemy.positions = [(random.randint(0, SIZE - 1), random.randint(0, SIZE - 1))] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 0): for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_1: print("SPace") clock.tick(0) #print(get_food_distance(player, food)) #getObsSmall([player, enemy], food) clock.tick(1) drawGrid(surface) enemy.handle_keys() obs = (player - food) # no random action in display phase action_space = np.array(q_table[obs]).copy() player.action(action_space, "QL") enemy.move() handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) getObsGrid([player, enemy], food, size=7, fullGrid=True) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
def step(snakes: list, food: Food, action): # First snake must be the player7 player = snakes[0] player.action([action], "AC") done = False obs = "Wrong Input" # currently overall reward increases every time food is eaten handleFoodEating(snakes, food) # the current state after move has been done # could be the whole f*****g grid or maybe just part of it around snakes head # for now let me try making it a 5x5 grid around its head if obs_type == "Grid": obs = getObsGrid(snakes, food, OBS_GRID_SIZE, fullGrid=False) if obs_type == "Small": obs = getObsSmall(snakes, food) info = "" return obs, player.reward, done, info
def render(agent, obs_type: str, board_size: int): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 5 enemy = Snake(1) enemy.lives = 5 enemy.positions = [[5,5]] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 4 and enemy.lives > 4): clock.tick(4) drawGrid(surface) enemyMovement(food=food,enemy=enemy) # no random action in display phase if obs_type == "Grid": obs = getObsGrid([player], food, size=board_size, fullGrid=False) if obs_type == 'Small': obs = getObsSmall([player, enemy], food) action = agent.choose_action(obs) action_space = [action] player.action(action_space, "PG") handleSnakeCollisions(player,enemy) handleFoodEating([player, enemy], food) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) #text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) #screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
# Take the action! '''Training Rewards handled inside snake class(kinda inefficient)''' '''We want to now the second best option as well, in case the first is running into it's own body''' action_space = [action] player.action(action_space, "QL") # Finding the index corresponding to the action #### MAYBE ### # this could potentially harm the training # enemy.move() # food.move() # what if the food could move? would that be interesting? ############## # need to determine these before handling because snake positions can be reset handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) reward = player.reward # first we need to obs immediately after the move. new_obs = (player - food, player.get_head_position()) #new_obs = (player-food) max_future_q = np.max(q_table[new_obs]) current_q = q_table[obs][action] if reward == FOOD_REWARD: new_q = FOOD_REWARD else: new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * ( reward + DISCOUNT * max_future_q) q_table[obs][action] = new_q episode_reward = player.reward