def displayRL(player, food, enemy): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) myfont = pygame.font.SysFont("bahnschrift", 20) max_iter = 0 while (player.lives > 4): max_iter += 1 if max_iter == 50: break clock.tick(7) drawGrid(surface) obs = (player - food, player.get_head_position()) # obs = (player - food) # no random action in display phase action_space = np.array(q_table[obs]).copy() player.action(action_space, "QL") handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score Player {0}".format(player.score), 1, (250, 250, 250)) # text2 = myfont.render("Score AI {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) # screen.blit(text2, (SCREEN_WIDTH - 120, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
def display(): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 10 enemy = Snake(1) enemy.positions = [(random.randint(0, SIZE - 1), random.randint(0, SIZE - 1))] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 0): for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_1: print("SPace") clock.tick(0) #print(get_food_distance(player, food)) #getObsSmall([player, enemy], food) clock.tick(1) drawGrid(surface) enemy.handle_keys() obs = (player - food) # no random action in display phase action_space = np.array(q_table[obs]).copy() player.action(action_space, "QL") enemy.move() handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) getObsGrid([player, enemy], food, size=7, fullGrid=True) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
def render(agent, obs_type: str, board_size: int): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 5 enemy = Snake(1) enemy.lives = 5 enemy.positions = [[5,5]] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 4 and enemy.lives > 4): clock.tick(4) drawGrid(surface) enemyMovement(food=food,enemy=enemy) # no random action in display phase if obs_type == "Grid": obs = getObsGrid([player], food, size=board_size, fullGrid=False) if obs_type == 'Small': obs = getObsSmall([player, enemy], food) action = agent.choose_action(obs) action_space = [action] player.action(action_space, "PG") handleSnakeCollisions(player,enemy) handleFoodEating([player, enemy], food) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) #text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) #screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
action = np.random.randint(0, 4) # Take the action! '''Training Rewards handled inside snake class(kinda inefficient)''' '''We want to now the second best option as well, in case the first is running into it's own body''' action_space = [action] player.action(action_space, "QL") # Finding the index corresponding to the action #### MAYBE ### # this could potentially harm the training # enemy.move() # food.move() # what if the food could move? would that be interesting? ############## # need to determine these before handling because snake positions can be reset handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) reward = player.reward # first we need to obs immediately after the move. new_obs = (player - food, player.get_head_position()) #new_obs = (player-food) max_future_q = np.max(q_table[new_obs]) current_q = q_table[obs][action] if reward == FOOD_REWARD: new_q = FOOD_REWARD else: new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * ( reward + DISCOUNT * max_future_q) q_table[obs][action] = new_q