Esempio n. 1
0
class Agent:
    def __init__(self, shape=(10, 12, 4), size=(20, 20)):
        self.NeuralNetwork = NeuralNetwork(shape)
        self.row, self.col = size
        self.game = SnakeGame(self.row, self.col)

    # performs a move in the game according to the agent's recommendation
    def move(self):

        # predicts best move with neural net
        X = self.game.information()
        y = self.NeuralNetwork.predict(X)

        # gets moves in rank order
        move_args = np.argsort(y)
        moves = list(Direction)

        # if selected move turns 180 degrees pick the next best move
        selected_move = moves[move_args[-1]]
        if np.linalg.norm(
                np.array(selected_move.value) +
                np.array(self.game.direction.value)) == 0:
            selected_move = moves[move_args[-2]]

        self.game.direction = selected_move
        self.game.step()
Esempio n. 2
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, prev_score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         prev_food_distance = self.get_food_distance(snake, food)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food = game.step(game_action)
             if done:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -1
                 ])
                 break
             else:
                 food_distance = self.get_food_distance(snake, food)
                 if score > prev_score or food_distance < prev_food_distance:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])
                 prev_observation = self.generate_observation(snake, food)
                 prev_food_distance = food_distance
     return training_data
Esempio n. 3
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, score, snake, food = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 # print('-----')
                 # print(steps)
                 # print(snake)
                 # print(food)
                 # print(prev_observation)
                 # print(predictions)
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
Esempio n. 4
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, prev_score, snake, food, obstacles = game.start() # returns generate_observation from snake game.
         prev_observation = self.generate_observation(snake, food, obstacles)
         prev_food_distance = self.get_food_distance(snake, food)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food, obstacles  = game.step(game_action)
             #print(training_data)
             #input("test")
             if done:
                 training_data.append([self.add_action_to_observation(prev_observation, action), -1])
                 break
             else:
                 food_distance = self.get_food_distance(snake, food)
                 if score > prev_score or food_distance < prev_food_distance: # did you get closer to the objective?
                     training_data.append([self.add_action_to_observation(prev_observation, action), 1]) # label as good decision.
                 else:
                     training_data.append([self.add_action_to_observation(prev_observation, action), 0]) # label as bad decision.
                 prev_observation = self.generate_observation(snake, food, obstacles)
                 prev_food_distance = food_distance
                 '''
                 Later we will be using this "1" or "0" to provide estimates for each possible decision. 
                 '''
     return training_data
Esempio n. 5
0
 def test_model(self, model):
     steps_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, _, snake, _ = game.start()
         prev_observation = self.generate_observation(snake)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 4, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, _, snake, _ = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 break
             else:
                 prev_observation = self.generate_observation(snake)
                 steps += 1
         steps_arr.append(steps)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
Esempio n. 6
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     count = 0
     solved = 0
     print("Testing in progress")
     for i in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         if self.game_type == 'maze':
             game = MazeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, score, snake, food = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 self.progress(i + 1, self.test_games)
                 if self.game_type == 'maze' and score == 1: solved += 1
                 count += 1
                 if False:
                     #if count % 100 == 0:
                     print('-----')
                     print('id: ' + str(count))
                     print(steps)
                     print(snake)
                     print(food)
                     print(prev_observation)
                     print(predictions)
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
     print("\n\n")
     print('Average steps:', mean(steps_arr))
     #print(Counter(steps_arr))
     print('Average score:', mean(scores_arr))
     #print(Counter(scores_arr))
     scores_arr.sort()
     print('Lowest score:', scores_arr[0])
     print('Highest score:', scores_arr[-1])
     if self.game_type == 'maze': print('Total solved mazes:', solved)
     with open('steps_arr', 'wb') as file:
         pickle.dump(steps_arr, file)
     with open('scores_arr', 'wb') as file:
         pickle.dump(scores_arr, file)
Esempio n. 7
0
 def visualise_game(self, model):
     game = SnakeGame(gui = True)
     _, _, snake, _ = game.start()
     prev_observation = self.generate_observation(snake)
     for _ in range(self.goal_steps):
         predictions = []
         for action in range(-1, 2):
            predictions.append(model.predict(self.add_action_to_observation(prev_observation, action).reshape(-1, 4, 1)))
         action = np.argmax(np.array(predictions))
         game_action = self.get_game_action(snake, action - 1)
         done, _, snake, _  = game.step(game_action)
         if done:
             break
         else:
             prev_observation = self.generate_observation(snake)
Esempio n. 8
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, _, snake, _ = game.start()
         prev_observation = self.generate_observation(snake)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, _, snake, _  = game.step(game_action)
             if done:
                 training_data.append([self.add_action_to_observation(prev_observation, action), 0])
                 break
             else:
                 training_data.append([self.add_action_to_observation(prev_observation, action), 1])
                 prev_observation = self.generate_observation(snake)
     print(len(training_data))
     return training_data
Esempio n. 9
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, score, snake, food, obstacles = game.start()
         prev_observation = self.generate_observation(
             snake, food, obstacles)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(
                     -1, 2):  # iterate through each possible decision
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(
                 predictions))  # choose decision with highest value (1)
             game_action = self.get_game_action(
                 snake, action - 1)  # perform action in the game.
             done, score, snake, food, obstacles = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 print('-----')
                 print(steps)
                 print(snake)
                 print(food)
                 print(prev_observation)
                 print(predictions)
                 break
             else:
                 prev_observation = self.generate_observation(
                     snake, food, obstacles)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
     print('Average score:', mean(scores_arr))
     print(Counter(scores_arr))
Esempio n. 10
0
 def initial_population(self):
     training_data = []
     print("Generating initial games")
     for i in range(self.initial_games):
         self.progress(i + 1, self.initial_games)
         game = SnakeGame()
         if self.game_type == 'maze':
             game = MazeGame()
         _, prev_score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         prev_food_distance = self.get_food_distance(snake, food)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food = game.step(game_action)
             if done:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -1
                 ])  # Snake is dead
                 break
             else:
                 food_distance = self.get_food_distance(snake, food)
                 if score > prev_score or food_distance < prev_food_distance:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])  # The last move was efficient
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])  # The last move was not efficient
                 prev_observation = self.generate_observation(snake, food)
                 prev_food_distance = food_distance
     with open('init_pop_gen', 'wb') as file:
         pickle.dump(training_data, file)
     return training_data
Esempio n. 11
0
                sock.sendto((json.dumps(SEND_LIST)).encode(), (CLIENT_IP, OUT_PORT)) 
                game.end_game()
                print("sending ENDGAME message! " + str(SEND_LIST))
            else:
                SEND_LIST = [1,OBSERVATION_LIST] # remember: we talk in [code, [obs1,obs2,obs3....]] form.
                sock.sendto((json.dumps(SEND_LIST)).encode(), (CLIENT_IP, OUT_PORT)) 


            print("sent message " + str(SEND_LIST))
            # we wait for client's response.
            data, addr = sock.recvfrom(1024) 
            # process the response, perform the action in-game.
            print ("received message: ", data.decode())
            received_json = json.loads(data)
            if(received_json[0] == 1):
                game.step(received_json[1]) # input the action into our game and proceed.
            elif(received_json[0] == 2):
                game.end_game()
                #active = false
                in_progress = false
                connected = false
                print("Client is disconnecting. Exiting game.")
                
            elif(received_json[0] == 0):
                print("WARNING: A client is connecting, even though game is in progress!")
            else:
                print("An unknown response was received from the client!")
                print("RECEIVED: " + str(received_json))

        except Exception as e:
            print("An Error has occurred: " + str(e))
Esempio n. 12
0
    game_memory = []
    game.reset( True )
    win = game.getWindow()
    alive = True
    prev_obs = []
    new_observation = []
    while(True):
        win.getch()
        if len(prev_obs)==0:
            action = game.sample()
        else:
            action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])

        choices.append(action)
                
        new_observation, reward, alive = game.step(action)
        if not alive:
            break
        prev_obs = new_observation
        game_memory.append([new_observation, action])
        score+=reward
        #break
    game.close()
    scores.append(score)

print('Average Score:',sum(scores)/len(scores))
print('choice 1:{}  choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
print(score_requirement)
#"""
"""
game.reset( True )