def get_action(self, state: GameState) -> GameAction: # Very similar to greedy agent get action, but now instead of picking an action with highest value we pick # action with highest avg value, so in avg our snake will do good best_actions = state.get_possible_actions(player_index=self.player_index) best_value = -np.inf for action in state.get_possible_actions(player_index=self.player_index): avg_value = 0 actions_len = 0 for opponents_actions in state.get_possible_actions_dicts_given_action(action, player_index=self.player_index): opponents_actions[self.player_index] = action next_state = get_next_state(state, opponents_actions) h_value = _heuristic_for_tournament(next_state, self.player_index) avg_value += h_value actions_len += 1 if len(state.opponents_alive) > 2: # consider only 1 possible opponents actions to reduce time & memory: break avg_value /= actions_len # choosing action according to the avg value we got preforming this action if avg_value > best_value: best_value = avg_value best_actions = [action] elif avg_value == best_value: best_actions.append(action) return np.random.choice(best_actions)
def get_action(self, state: GameState) -> GameAction: # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later start = time.time() best_actions = state.get_possible_actions( player_index=self.player_index) best_value = -np.inf for action in state.get_possible_actions( player_index=self.player_index): for opponents_actions in state.get_possible_actions_dicts_given_action( action, player_index=self.player_index): opponents_actions[self.player_index] = action next_state = get_next_state(state, opponents_actions) h_value = self._heuristic(next_state) if h_value > best_value: best_value = h_value best_actions = [action] elif h_value == best_value: best_actions.append(action) if len(state.opponents_alive) > 2: # consider only 1 possible opponents actions to reduce time & memory: break end = time.time() self.counter_steps += 1 self.avg_time = ((end - start) + self.avg_time * (self.counter_steps - 1)) / self.counter_steps return np.random.choice(best_actions)
def heuristic(state: GameState, player_index: int) -> float: """ Computes the heuristic value for the agent with player_index at the given state :param state: :param player_index: integer. represents the identity of the player. this is the index of the agent's snake in the state.snakes array as well. :return: """ #c if not state.snakes[player_index].alive: #we never want our snake to die return -500 #setting weights too_long = 8 fruit_weight = 1.4 weight_for_length = 500 board_factor = np.sqrt(state.board_size.width**2 + state.board_size.height**2) snake_length = state.snakes[player_index].length turns_left = (state.game_duration_in_turns - state.turn_number) possible_fruits = min( len(state.fruits_locations) + sum([s.length for s in state.snakes if s.alive]), turns_left) if (possible_fruits > 0): bonus_for_length = weight_for_length * snake_length / possible_fruits else: bonus_for_length = weight_for_length #calculating manheten distance and normalizing for board bonus_for_avoiding_tail = cityblock( state.snakes[player_index].head, state.snakes[player_index].tail_position) / np.sqrt( state.board_size.width**2 + state.board_size.height**2) avoiding_tail_weight = 1 - 1 / snake_length if snake_length > too_long else 0 bonus_for_avoiding_tail *= avoiding_tail_weight #distinguishing between two game modes eating fruits and surviving if len(state.fruits_locations) > 0: nearest_fruits_weight = min([ cityblock(state.snakes[player_index].head, trophy_i) for trophy_i in state.fruits_locations ]) nearest_fruit_bonus = state.board_size.height + state.board_size.width - nearest_fruits_weight nearest_fruit_bonus /= (state.board_size.height + state.board_size.width) #normalize nearest_fruit_bonus *= fruit_weight return nearest_fruit_bonus + bonus_for_length + avoiding_tail_weight else: weight = 1.8 distance_from_enemy_bonus = min( cityblock(state.snakes[player_index].head, state.snakes[enemy].head) for enemy in state.get_opponents_alive(player_index)) if len( state.get_opponents_alive(player_index)) > 0 else 0 distance_from_enemy_bonus /= board_factor #normalize return bonus_for_length * weight + bonus_for_avoiding_tail * weight + distance_from_enemy_bonus
def get_action(self, state: GameState) -> GameAction: best_value = -np.inf best_actions = state.get_possible_actions(player_index=self.player_index) for action in state.get_possible_actions(player_index=self.player_index): next_state = self.TurnBasedGameState(state, action) max_value = self.RB_minimax(next_state, state.depth-1) if max_value > best_value: best_value = max_value best_actions = [action] elif best_value == max_value: best_actions.append(action) return np.random.choice(best_actions)
def get_action(self, state: GameState) -> GameAction: max_value = np.NINF best_action = None for action in state.get_possible_actions(self.player_index): value = self.rb_minimax(self.TurnBasedGameState(state, action), 2) max_value = max(max_value, value) best_action = action if max_value == value else best_action return best_action
def get_action(self, state: GameState) -> GameAction: if self.is_trap(state): return self.trap_escape(state) # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later best_actions = state.get_possible_actions( player_index=self.player_index) best_value = -np.inf for action in state.get_possible_actions( player_index=self.player_index): for opponents_actions in state.get_possible_actions_dicts_given_action( action, player_index=self.player_index): opponents_actions[self.player_index] = action next_state = get_next_state(state, opponents_actions) h_value = self.tournament_heuristic(next_state) if h_value > best_value: best_value = h_value best_actions = [action] elif h_value == best_value: best_actions.append(action) return np.random.choice(best_actions)
def get_action(self, state: GameState) -> GameAction: game_state = self.TurnBasedGameState(state, None) actions = state.get_possible_actions(player_index=self.player_index) best_action = GameAction.STRAIGHT # default action. will be changed best_value = -np.inf for action in actions: game_state.agent_action = action value = self._RB_alpha_beta(game_state, self.Turn.OPPONENTS_TURN, self.DEPTH, -np.inf, np.inf) if value > best_value: best_action = action best_value = value return best_action
def get_action(self, state: GameState) -> GameAction: cur_time = time.clock() cur_max = -np.inf max_state = GameAction(1) for action in state.get_possible_actions(self.player_index): state_after_turn = MinimaxAgent.TurnBasedGameState(state, action) state_value = self.__RB_Minimax__(state_after_turn, 2) if state_value > cur_max: cur_max = state_value max_state = action end_time = time.clock() self.time += end_time - cur_time self.num_played += 1 return max_state
def get_action(self, state: GameState) -> GameAction: if self.curr_turn is None: self.curr_turn = self.TurnBasedGameState(state, None) best_value = -np.inf best_actions = [] self.curr_turn.curr_turn = MinimaxAgent.Turn.OPPONENTS_TURN for our_action in state.get_possible_actions( player_index=self.player_index): h_value = self.minimax(self.TurnBasedGameState(state, our_action), 2) if h_value > best_value: best_value = h_value best_actions = [our_action] elif h_value == best_value: best_actions.append(our_action) return np.random.choice(best_actions)
def get_action(self, state: GameState) -> GameAction: # Insert your code here... max_actions = [] best_value = -np.inf for action in state.get_possible_actions( player_index=self.player_index): turn_next_state = MinimaxAgent.TurnBasedGameState(state, action) min_max_value = self.alpha_beta_value(turn_next_state, MinimaxAgent.Turn.AGENT_TURN, 2, float('-inf'), float('inf')) if min_max_value > best_value: best_value = min_max_value max_actions = [action] elif min_max_value == best_value: max_actions.append(action) return np.random.choice(max_actions)
def get_action(self, state: GameState) -> GameAction: # Insert your code here... start = time.time() choose_max = -np.inf max_action = GameAction.LEFT for agent_action in state.get_possible_actions( player_index=self.player_index): head_tree = self.TurnBasedGameState( state, agent_action) #possible opponent action current_action_max = self.minimax(head_tree, 1) #print(choose_max, curr_result, "\n") if choose_max < current_action_max: choose_max = current_action_max max_action = agent_action end = time.time() self.counter_steps += 1 self.avg_time = ((end - start) + self.avg_time * (self.counter_steps - 1)) / self.counter_steps return max_action pass
def get_action(self, state: GameState) -> GameAction: start_time = time.clock() max_value = -np.inf maxi_action = GameAction(0) all_actions = state.get_possible_actions(self.player_index) for action in all_actions: curr_value = self.get_action_wrapper( MinimaxAgent.TurnBasedGameState(state, action), self.dep, -np.inf, np.inf) if curr_value > max_value: max_value = curr_value maxi_action = action stop_time = time.clock() #environment.time_elapsed += stop_time - start_time self.time += stop_time - start_time self.num_played += 1 avg_turn = self.time / self.num_played if ((avg_turn) > 60 / 500 and self.dep > 2): self.dep -= 1 elif ( (avg_turn) < 45 / 500 ): #if we have extra time we can allow ourselves to allow more depth self.dep += 1 return maxi_action
def get_action(self, state: GameState) -> GameAction: D_arr = [2, 3, 4] best_value = -np.inf best_actions = [] i = 0 for our_action in state.get_possible_actions( player_index=self.player_index): t = time.time() h_value = self.abminimax( self.TurnBasedGameState(state, our_action), D_arr[i], -np.inf, np.inf) elapsed = time.time() - t if elapsed < 15 and i < 2: i += 1 if elapsed > 20 and i > 0: i -= 1 # elif elapsed if h_value > best_value: best_value = h_value best_actions = [our_action] elif h_value == best_value: best_actions.append(our_action) return np.random.choice(best_actions)
def _get_reachable_area(state: GameState, player_index: int) -> float: snake = state.snakes[player_index] # gets the boards of player index snake positions snake_board = state.get_board(player_index)[0] # dfs will find the area reachable return _dfs(snake_board, snake.head, True)
def heuristic(state: GameState, player_index: int) -> float: """ Computes the heuristic value for the agent with player_index at the given state :param state: :param player_index: integer. represents the identity of the player. this is the index of the agent's snake in the state.snakes array as well. :return: """ # Insert your code here... if not state.snakes[player_index].alive: return state.snakes[player_index].length discount_factor = 0.5 max_possible_fruits = len(state.fruits_locations) + sum([ s.length for s in state.snakes if s.index != player_index and s.alive ]) turns_left = (state.game_duration_in_turns - state.turn_number) max_possible_fruits = min(max_possible_fruits, turns_left) optimistic_future_reward = discount_factor * ( 1 - discount_factor**max_possible_fruits) / (1 - discount_factor) original_greedy_value = state.snakes[ player_index].length + optimistic_future_reward best_dist = state.board_size.width + state.board_size.height for fruit in state.fruits_locations: d_x = abs(state.snakes[player_index].head[0] - fruit[0]) d_y = abs(state.snakes[player_index].head[1] - fruit[1]) manhattan_dist = d_x + d_y if 0 <= manhattan_dist < best_dist: best_dist = manhattan_dist # better bonus for lower best_dist if best_dist != 0: bonus_dist_fruit = optimistic_future_reward / best_dist else: bonus_dist_fruit = optimistic_future_reward # better bonus if next state isn't a border bonus_border = 0 head_x = state.snakes[player_index].head[0] head_y = state.snakes[player_index].head[1] radios = 1 if not state.is_within_grid_boundaries((head_x + radios, head_y)) or not state.is_within_grid_boundaries((head_x, head_y+radios))\ or not state.is_within_grid_boundaries((head_x - radios, head_y)) or not state.is_within_grid_boundaries((head_x, head_y-radios)): bonus_border -= optimistic_future_reward / (best_dist + radios) # if next move isn't a snake's body then bonus 0 else minus optimistic_future_reward bonus_is_snake_in_cell = 0 for s in state.snakes: if not s.index == player_index: if s.is_in_cell((head_x + radios, head_y)) or s.is_in_cell((head_x, head_y + radios)) \ or s.is_in_cell((head_x - radios, head_y)) or s.is_in_cell((head_x, head_y - radios)): bonus_border -= optimistic_future_reward / (best_dist) break elif state.snakes[player_index].is_in_cell((head_x+radios, head_y)) or \ state.snakes[player_index].is_in_cell((head_x, head_y+radios))\ or state.snakes[player_index].is_in_cell((head_x - radios, head_y)) \ or state.snakes[player_index].is_in_cell((head_x, head_y - radios)): bonus_border -= optimistic_future_reward break ret_h_val = float(original_greedy_value + bonus_dist_fruit + bonus_border + bonus_is_snake_in_cell) return ret_h_val