Beispiel #1
0
    def get_action(self, state: GameState) -> GameAction:

        # Very similar to greedy agent get action, but now instead of picking an action with highest value we pick
        # action with highest avg value, so in avg our snake will do good
        best_actions = state.get_possible_actions(player_index=self.player_index)
        best_value = -np.inf
        for action in state.get_possible_actions(player_index=self.player_index):
            avg_value = 0
            actions_len = 0
            for opponents_actions in state.get_possible_actions_dicts_given_action(action,
                                                                                   player_index=self.player_index):
                opponents_actions[self.player_index] = action
                next_state = get_next_state(state, opponents_actions)
                h_value = _heuristic_for_tournament(next_state, self.player_index)
                avg_value += h_value
                actions_len += 1
                if len(state.opponents_alive) > 2:
                    # consider only 1 possible opponents actions to reduce time & memory:
                    break
            avg_value /= actions_len
            # choosing action according to the avg value we got preforming this action
            if avg_value > best_value:
                best_value = avg_value
                best_actions = [action]
            elif avg_value == best_value:
                best_actions.append(action)

        return np.random.choice(best_actions)
Beispiel #2
0
 def get_action(self, state: GameState) -> GameAction:
     # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later
     start = time.time()
     best_actions = state.get_possible_actions(
         player_index=self.player_index)
     best_value = -np.inf
     for action in state.get_possible_actions(
             player_index=self.player_index):
         for opponents_actions in state.get_possible_actions_dicts_given_action(
                 action, player_index=self.player_index):
             opponents_actions[self.player_index] = action
             next_state = get_next_state(state, opponents_actions)
             h_value = self._heuristic(next_state)
             if h_value > best_value:
                 best_value = h_value
                 best_actions = [action]
             elif h_value == best_value:
                 best_actions.append(action)
             if len(state.opponents_alive) > 2:
                 # consider only 1 possible opponents actions to reduce time & memory:
                 break
     end = time.time()
     self.counter_steps += 1
     self.avg_time = ((end - start) + self.avg_time *
                      (self.counter_steps - 1)) / self.counter_steps
     return np.random.choice(best_actions)
Beispiel #3
0
 def get_action(self, state: GameState) -> GameAction:
     best_value = -np.inf
     best_actions = state.get_possible_actions(player_index=self.player_index)
     for action in state.get_possible_actions(player_index=self.player_index):
         next_state = self.TurnBasedGameState(state, action)
         max_value = self.RB_minimax(next_state, state.depth-1)
         if max_value > best_value:
             best_value = max_value
             best_actions = [action]
         elif best_value == max_value:
             best_actions.append(action)
     return np.random.choice(best_actions)
Beispiel #4
0
 def get_action(self, state: GameState) -> GameAction:
     max_value = np.NINF
     best_action = None
     for action in state.get_possible_actions(self.player_index):
         value = self.rb_minimax(self.TurnBasedGameState(state, action), 2)
         max_value = max(max_value, value)
         best_action = action if max_value == value else best_action
     return best_action
Beispiel #5
0
    def get_action(self, state: GameState) -> GameAction:
        if self.is_trap(state):
            return self.trap_escape(state)
        # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later
        best_actions = state.get_possible_actions(
            player_index=self.player_index)
        best_value = -np.inf
        for action in state.get_possible_actions(
                player_index=self.player_index):
            for opponents_actions in state.get_possible_actions_dicts_given_action(
                    action, player_index=self.player_index):
                opponents_actions[self.player_index] = action
                next_state = get_next_state(state, opponents_actions)
                h_value = self.tournament_heuristic(next_state)
                if h_value > best_value:
                    best_value = h_value
                    best_actions = [action]
                elif h_value == best_value:
                    best_actions.append(action)

        return np.random.choice(best_actions)
Beispiel #6
0
 def get_action(self, state: GameState) -> GameAction:
     game_state = self.TurnBasedGameState(state, None)
     actions = state.get_possible_actions(player_index=self.player_index)
     best_action = GameAction.STRAIGHT  # default action. will be changed
     best_value = -np.inf
     for action in actions:
         game_state.agent_action = action
         value = self._RB_alpha_beta(game_state, self.Turn.OPPONENTS_TURN,
                                     self.DEPTH, -np.inf, np.inf)
         if value > best_value:
             best_action = action
             best_value = value
     return best_action
Beispiel #7
0
 def get_action(self, state: GameState) -> GameAction:
     cur_time = time.clock()
     cur_max = -np.inf
     max_state = GameAction(1)
     for action in state.get_possible_actions(self.player_index):
         state_after_turn = MinimaxAgent.TurnBasedGameState(state, action)
         state_value = self.__RB_Minimax__(state_after_turn, 2)
         if state_value > cur_max:
             cur_max = state_value
             max_state = action
     end_time = time.clock()
     self.time += end_time - cur_time
     self.num_played += 1
     return max_state
 def get_action(self, state: GameState) -> GameAction:
     if self.curr_turn is None:
         self.curr_turn = self.TurnBasedGameState(state, None)
     best_value = -np.inf
     best_actions = []
     self.curr_turn.curr_turn = MinimaxAgent.Turn.OPPONENTS_TURN
     for our_action in state.get_possible_actions(
             player_index=self.player_index):
         h_value = self.minimax(self.TurnBasedGameState(state, our_action),
                                2)
         if h_value > best_value:
             best_value = h_value
             best_actions = [our_action]
         elif h_value == best_value:
             best_actions.append(our_action)
     return np.random.choice(best_actions)
 def get_action(self, state: GameState) -> GameAction:
     # Insert your code here...
     max_actions = []
     best_value = -np.inf
     for action in state.get_possible_actions(
             player_index=self.player_index):
         turn_next_state = MinimaxAgent.TurnBasedGameState(state, action)
         min_max_value = self.alpha_beta_value(turn_next_state,
                                               MinimaxAgent.Turn.AGENT_TURN,
                                               2, float('-inf'),
                                               float('inf'))
         if min_max_value > best_value:
             best_value = min_max_value
             max_actions = [action]
         elif min_max_value == best_value:
             max_actions.append(action)
     return np.random.choice(max_actions)
Beispiel #10
0
    def get_action(self, state: GameState) -> GameAction:
        # Insert your code here...
        start = time.time()
        choose_max = -np.inf

        max_action = GameAction.LEFT
        for agent_action in state.get_possible_actions(
                player_index=self.player_index):
            head_tree = self.TurnBasedGameState(
                state, agent_action)  #possible opponent action
            current_action_max = self.minimax(head_tree, 1)

            #print(choose_max, curr_result, "\n")
            if choose_max < current_action_max:
                choose_max = current_action_max
                max_action = agent_action
        end = time.time()
        self.counter_steps += 1
        self.avg_time = ((end - start) + self.avg_time *
                         (self.counter_steps - 1)) / self.counter_steps
        return max_action
        pass
Beispiel #11
0
 def get_action(self, state: GameState) -> GameAction:
     start_time = time.clock()
     max_value = -np.inf
     maxi_action = GameAction(0)
     all_actions = state.get_possible_actions(self.player_index)
     for action in all_actions:
         curr_value = self.get_action_wrapper(
             MinimaxAgent.TurnBasedGameState(state, action), self.dep,
             -np.inf, np.inf)
         if curr_value > max_value:
             max_value = curr_value
             maxi_action = action
     stop_time = time.clock()
     #environment.time_elapsed += stop_time - start_time
     self.time += stop_time - start_time
     self.num_played += 1
     avg_turn = self.time / self.num_played
     if ((avg_turn) > 60 / 500 and self.dep > 2):
         self.dep -= 1
     elif (
         (avg_turn) < 45 / 500
     ):  #if we have extra time we can allow ourselves to allow more depth
         self.dep += 1
     return maxi_action
Beispiel #12
0
    def get_action(self, state: GameState) -> GameAction:
        D_arr = [2, 3, 4]

        best_value = -np.inf
        best_actions = []
        i = 0
        for our_action in state.get_possible_actions(
                player_index=self.player_index):
            t = time.time()
            h_value = self.abminimax(
                self.TurnBasedGameState(state, our_action), D_arr[i], -np.inf,
                np.inf)
            elapsed = time.time() - t
            if elapsed < 15 and i < 2:
                i += 1
            if elapsed > 20 and i > 0:
                i -= 1
            # elif elapsed
            if h_value > best_value:
                best_value = h_value
                best_actions = [our_action]
            elif h_value == best_value:
                best_actions.append(our_action)
        return np.random.choice(best_actions)