def game(): # Setup score = 0 global highscore grid = [] for x in range(width): grid.append([]) for y in range(height): grid[x].append(0) head = Segment(width / 2, height / 5) body = Segment(head.X, head.Y + 1) tail = Segment(body.X, head.Y + 1) head.setNext(body) body.setNext(tail) snake = Snake(head, tail) apple = Apple(width, height) #Game loop finished = False while not finished: for event in pygame.event.get(): if event.type == pygame.QUIT: finished = True # Change direction keyPressed = pygame.key.get_pressed() if snake.direction == "up" or snake.direction == "down": if keyPressed[pygame.K_RIGHT]: snake.direction = "right" if keyPressed[pygame.K_LEFT]: snake.direction = "left" if snake.direction == "right" or snake.direction == "left": if keyPressed[pygame.K_UP]: snake.direction = "up" if keyPressed[pygame.K_DOWN]: snake.direction = "down" snake.move() # Check if collected an apple if snake.head.X == apple.X and snake.head.Y == apple.Y: snake.grow() score += 1 if score > highscore: highscore = score appleGen = False while not appleGen: apple.generate() if grid[apple.X][apple.Y] == 0: appleGen = True # Death check if snake.isDead(height, width): finished = True break draw(grid, snake, apple, score) clock.tick(fps) gameOver()
def check_collisions(snake: Snake, screen: pygame.Surface, goal: Goal, field: Field) -> bool: head = snake.head() if screen.get_at(head) == WHITE: if head in goal: goal.move_to(generate_goal_area(snake=snake, field=field)) snake.grow() else: return False return True
def run(): alpha = 0.15 #learning rate gamma = 0.7 #discount factor epsilon = 0 #exploration value demoEpsilon = 0 #Epsilon value for the demo mode dynamicEpsilon = False #If True, the exploration rate will be reducing as more of the state-space is dicovered, if False the rate will be static training_episodes = 1000000 deathAfter = 250 #Kill the snake after a certain number of moves to prevent it from getting stuck in a cycle qTableSave = os.path.join(os.sys.path[0], "saves", "QTable.txt") counterSave = os.path.join(os.sys.path[0], "saves", "GameCounter.txt") fps = 30 displayMode = "Demo" # Load existing Q-Table and episode counter or create new ones numberOfStates = init_state_dict() Q = np.zeros((numberOfStates, len(actions))) try: Q = np.loadtxt(qTableSave).reshape(numberOfStates, len(actions)) except: print("Could not load an existing Q-Table") try: counterFile = open(counterSave, "r") startEpisode = int(counterFile.read()) counterFile.close() except: startEpisode = 0 highscore = 0 for episode in range(startEpisode, training_episodes + 1): killApp = False try: if (episode % 100 == 0): count = 0 qFile = open(qTableSave, "w") for row in Q: if not "0. 0. 0." in str(row): count += 1 np.savetxt(qFile, row) qFile.close() counterFile = open(counterSave, "w") counterFile.write(str(episode)) counterFile.close exploredPercentage = count / (numberOfStates) if dynamicEpsilon: if exploredPercentage < 0.95: epsilon = ( 1 - exploredPercentage ) / 2 #Reduce exploration value as more states get explored else: epsilon = 0.05 print( str(exploredPercentage * 100) + "%" + " of state-space explored") except: print("Save error") # Initialise the game grid and the score counter score = 0 grid = [] for x in range(width): grid.append([]) for y in range(height): grid[x].append(0) # Create the snake and the apple head = Segment(width / 2, height / 5) body = Segment(head.X, head.Y + 1) tail = Segment(body.X, head.Y + 1) head.setNext(body) body.setNext(tail) snake = Snake(head, tail) apple = Apple(width, height) startStateID = state_dict[getState(snake, apple)] nextStateID = startStateID prevDistToApple = distanceToApple(snake, apple) deathCountdown = deathAfter #Game loop finished = False paused = False while not finished: for event in pygame.event.get(): if event.type == pygame.QUIT: killApp = True # Pause/unpause, switch the mode between training and demo keyPressed = pygame.key.get_pressed() if keyPressed[pygame.K_SPACE]: paused = True elif keyPressed[pygame.K_RETURN]: paused = False elif keyPressed[pygame.K_UP]: fps = 10000 displayMode = "Training" dynamicEpsilon = True elif keyPressed[pygame.K_DOWN]: fps = 30 displayMode = "Demo" dynamicEpsilon = False epsilon = demoEpsilon elif keyPressed[pygame.K_END]: killApp = True if not paused: reward = 0 stateID = nextStateID # Choose action if random.uniform(0, 1) < epsilon: unexplored = False for i in range( 0, 3 ): #Unlike classic Q-Learning, the algorithm prefers to go to a previously unexplored state, instead of choosing an action randomly if Q[stateID, i] == 0.: actionID = i unexplored = True break if not unexplored: #If no unexplored states were found, choose a random action actionID = random.randint(0, 2) else: actionID = np.argmax(Q[stateID]) # Change direction if actions[actionID] != "wait": if snake.direction == "up": snake.direction = actions[actionID] elif snake.direction == "down": if actions[actionID] == "right": snake.direction = "left" if actions[actionID] == "left": snake.direction = "right" elif snake.direction == "left": if actions[actionID] == "right": snake.direction = "up" if actions[actionID] == "left": snake.direction = "down" elif snake.direction == "right": if actions[actionID] == "right": snake.direction = "down" if actions[actionID] == "left": snake.direction = "up" snake.move() distToApple = distanceToApple(snake, apple) # Check if collected an apple if snake.head.X == apple.X and snake.head.Y == apple.Y: snake.grow() score += 1 if score > highscore: highscore = score reward += 500 deathCountdown = deathAfter appleGen = False while not appleGen: apple.generate() if grid[apple.X][apple.Y] == 0: appleGen = True prevDistToApple = distanceToApple(snake, apple) else: deathCountdown -= 1 distToApple = distanceToApple(snake, apple) if distToApple >= prevDistToApple: reward -= 5 else: reward += 1 prevDistToApple = distToApple # Death check if snake.isDead(height, width) or deathCountdown <= 0: finished = True reward -= 10000 nextStateID = startStateID oldQ = Q[stateID, actionID] nextMax = -10000 updatedQ = (1 - alpha) * oldQ + alpha * (reward + gamma * nextMax) Q[stateID, actionID] = updatedQ else: nextState = getState(snake, apple) nextStateID = state_dict[nextState] nextMax = np.max(Q[nextStateID]) oldQ = Q[stateID, actionID] updatedQ = (1 - alpha) * oldQ + alpha * (reward + gamma * nextMax) Q[stateID, actionID] = updatedQ draw(grid, snake, apple, score, episode, highscore, displayMode) clock.tick(fps) if killApp == True: break if killApp == True: break
def run(): deathAfter = 250 #Kill the snake after a certain number of moves to prevent it from getting stuck in a cycle qTableSave = os.path.join(os.sys.path[0], "saves", "QTableDemo.txt") fps = 30 # Load existing Q-Table numberOfStates = init_state_dict() Q = np.zeros((numberOfStates, len(actions))) try: Q = np.loadtxt(qTableSave).reshape(numberOfStates, len(actions)) except: print("Failed to load the Q-Table") highscore = 0 killApp = False while not killApp: # Initialise the game grid and the score counter score = 0 grid = [] for x in range(width): grid.append([]) for y in range(height): grid[x].append(0) # Create the snake and the apple head = Segment(width / 2, height / 5) body = Segment(head.X, head.Y + 1) tail = Segment(body.X, head.Y + 1) head.setNext(body) body.setNext(tail) snake = Snake(head, tail) apple = Apple(width, height) startStateID = state_dict[getState(snake, apple)] nextStateID = startStateID prevDistToApple = distanceToApple(snake, apple) deathCountdown = deathAfter #Game loop finished = False paused = False while not finished: for event in pygame.event.get(): if event.type == pygame.QUIT: killApp = True # Pause/unpause keyPressed = pygame.key.get_pressed() if keyPressed[pygame.K_SPACE]: paused = True elif keyPressed[pygame.K_RETURN]: paused = False elif keyPressed[pygame.K_END]: killApp = True if not paused: reward = 0 stateID = nextStateID # Choose action actionID = np.argmax(Q[stateID]) # Change direction if actions[actionID] != "wait": if snake.direction == "up": snake.direction = actions[actionID] elif snake.direction == "down": if actions[actionID] == "right": snake.direction = "left" if actions[actionID] == "left": snake.direction = "right" elif snake.direction == "left": if actions[actionID] == "right": snake.direction = "up" if actions[actionID] == "left": snake.direction = "down" elif snake.direction == "right": if actions[actionID] == "right": snake.direction = "down" if actions[actionID] == "left": snake.direction = "up" snake.move() distToApple = distanceToApple(snake, apple) # Check if collected an apple if snake.head.X == apple.X and snake.head.Y == apple.Y: snake.grow() score += 1 if score > highscore: highscore = score reward += 500 deathCountdown = deathAfter appleGen = False while not appleGen: apple.generate() if grid[apple.X][apple.Y] == 0: appleGen = True prevDistToApple = distanceToApple(snake, apple) else: deathCountdown -= 1 distToApple = distanceToApple(snake, apple) if distToApple >= prevDistToApple: reward -= 5 else: reward += 1 prevDistToApple = distToApple # Death check if snake.isDead(height, width) or deathCountdown <= 0: finished = True else: nextState = getState(snake, apple) nextStateID = state_dict[nextState] draw(grid, snake, apple, score, highscore) clock.tick(fps) if killApp == True: break if killApp == True: break