class Agent: def __init__(self, shape=(10, 12, 4), size=(20, 20)): self.NeuralNetwork = NeuralNetwork(shape) self.row, self.col = size self.game = SnakeGame(self.row, self.col) # performs a move in the game according to the agent's recommendation def move(self): # predicts best move with neural net X = self.game.information() y = self.NeuralNetwork.predict(X) # gets moves in rank order move_args = np.argsort(y) moves = list(Direction) # if selected move turns 180 degrees pick the next best move selected_move = moves[move_args[-1]] if np.linalg.norm( np.array(selected_move.value) + np.array(self.game.direction.value)) == 0: selected_move = moves[move_args[-2]] self.game.direction = selected_move self.game.step()
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance return training_data
def test_model(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: # print('-----') # print(steps) # print(snake) # print(food) # print(prev_observation) # print(predictions) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score)
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, prev_score, snake, food, obstacles = game.start() # returns generate_observation from snake game. prev_observation = self.generate_observation(snake, food, obstacles) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food, obstacles = game.step(game_action) #print(training_data) #input("test") if done: training_data.append([self.add_action_to_observation(prev_observation, action), -1]) break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: # did you get closer to the objective? training_data.append([self.add_action_to_observation(prev_observation, action), 1]) # label as good decision. else: training_data.append([self.add_action_to_observation(prev_observation, action), 0]) # label as bad decision. prev_observation = self.generate_observation(snake, food, obstacles) prev_food_distance = food_distance ''' Later we will be using this "1" or "0" to provide estimates for each possible decision. ''' return training_data
def test_model(self, model): steps_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) game_memory.append([prev_observation, action]) if done: break else: prev_observation = self.generate_observation(snake) steps += 1 steps_arr.append(steps) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr))
def test_model(self, model): steps_arr = [] scores_arr = [] count = 0 solved = 0 print("Testing in progress") for i in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() if self.game_type == 'maze': game = MazeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: self.progress(i + 1, self.test_games) if self.game_type == 'maze' and score == 1: solved += 1 count += 1 if False: #if count % 100 == 0: print('-----') print('id: ' + str(count)) print(steps) print(snake) print(food) print(prev_observation) print(predictions) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score) print("\n\n") print('Average steps:', mean(steps_arr)) #print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) #print(Counter(scores_arr)) scores_arr.sort() print('Lowest score:', scores_arr[0]) print('Highest score:', scores_arr[-1]) if self.game_type == 'maze': print('Total solved mazes:', solved) with open('steps_arr', 'wb') as file: pickle.dump(steps_arr, file) with open('scores_arr', 'wb') as file: pickle.dump(scores_arr, file)
def visualise_game(self, model): game = SnakeGame(gui = True) _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append(model.predict(self.add_action_to_observation(prev_observation, action).reshape(-1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake)
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, _, snake, _ = game.step(game_action) if done: training_data.append([self.add_action_to_observation(prev_observation, action), 0]) break else: training_data.append([self.add_action_to_observation(prev_observation, action), 1]) prev_observation = self.generate_observation(snake) print(len(training_data)) return training_data
def test_model(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food, obstacles = game.start() prev_observation = self.generate_observation( snake, food, obstacles) for _ in range(self.goal_steps): predictions = [] for action in range( -1, 2): # iterate through each possible decision predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array( predictions)) # choose decision with highest value (1) game_action = self.get_game_action( snake, action - 1) # perform action in the game. done, score, snake, food, obstacles = game.step(game_action) game_memory.append([prev_observation, action]) if done: print('-----') print(steps) print(snake) print(food) print(prev_observation) print(predictions) break else: prev_observation = self.generate_observation( snake, food, obstacles) steps += 1 steps_arr.append(steps) scores_arr.append(score) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) print(Counter(scores_arr))
def initial_population(self): training_data = [] print("Generating initial games") for i in range(self.initial_games): self.progress(i + 1, self.initial_games) game = SnakeGame() if self.game_type == 'maze': game = MazeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) # Snake is dead break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) # The last move was efficient else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) # The last move was not efficient prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance with open('init_pop_gen', 'wb') as file: pickle.dump(training_data, file) return training_data
sock.sendto((json.dumps(SEND_LIST)).encode(), (CLIENT_IP, OUT_PORT)) game.end_game() print("sending ENDGAME message! " + str(SEND_LIST)) else: SEND_LIST = [1,OBSERVATION_LIST] # remember: we talk in [code, [obs1,obs2,obs3....]] form. sock.sendto((json.dumps(SEND_LIST)).encode(), (CLIENT_IP, OUT_PORT)) print("sent message " + str(SEND_LIST)) # we wait for client's response. data, addr = sock.recvfrom(1024) # process the response, perform the action in-game. print ("received message: ", data.decode()) received_json = json.loads(data) if(received_json[0] == 1): game.step(received_json[1]) # input the action into our game and proceed. elif(received_json[0] == 2): game.end_game() #active = false in_progress = false connected = false print("Client is disconnecting. Exiting game.") elif(received_json[0] == 0): print("WARNING: A client is connecting, even though game is in progress!") else: print("An unknown response was received from the client!") print("RECEIVED: " + str(received_json)) except Exception as e: print("An Error has occurred: " + str(e))
game_memory = [] game.reset( True ) win = game.getWindow() alive = True prev_obs = [] new_observation = [] while(True): win.getch() if len(prev_obs)==0: action = game.sample() else: action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0]) choices.append(action) new_observation, reward, alive = game.step(action) if not alive: break prev_obs = new_observation game_memory.append([new_observation, action]) score+=reward #break game.close() scores.append(score) print('Average Score:',sum(scores)/len(scores)) print('choice 1:{} choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices))) print(score_requirement) #""" """ game.reset( True )