def alphabeta(self, state, depth, alpha, beta, max_player, timer): state_proc = self.proc_state_table.get(state) if state_proc is None: state_proc = env.AlphaStateProcessor(state) state_proc.process() self.proc_state_table.put(state, state_proc) self.nodes += 1 if timer.is_over(): raise Exception('time is over!') if depth == 0 or state_proc.is_terminal(): return -1, state_proc.utility(self.color) moving_order = self.moving_order.get(state) if moving_order is None: moving_order = self.get_fresh_dict() if max_player: max_act = -1 for a, v in reversed( sorted(moving_order.items(), key=lambda kv: kv[1])): if len(state[a]) >= 6: continue next_state = env.get_next_state(state, a, self.color) _, val = self.alphabeta(next_state, depth - 1, alpha, beta, False, timer) moving_order[a] = val if alpha < val: alpha = val max_act = a if alpha >= beta: break self.moving_order.put(state, moving_order) return max_act, alpha else: min_act = -1 for a, v in sorted(moving_order.items(), key=lambda kv: kv[1]): if len(state[a]) >= 6: continue next_state = env.get_next_state( state, a, env.get_oponent_color(self.color)) _, val = self.alphabeta(next_state, depth - 1, alpha, beta, True, timer) moving_order[a] = val if beta > val: beta = val min_act = a if alpha >= beta: break self.moving_order.put(state, moving_order) return min_act, beta
def minimax(self, state: TurnBasedGameState, D: int): if D == 0 or not state.game_state.snakes[ self.player_index].alive or state.game_state.is_terminal_state: return heuristic(state.game_state, self.player_index) best_value = -np.inf worst_value = np.inf if state.turn == self.Turn.AGENT_TURN: for our_action in state.game_state.get_possible_actions( player_index=self.player_index): h_value = self.minimax( self.TurnBasedGameState(state.game_state, our_action), D) if h_value > best_value: best_value = h_value return best_value else: for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) h_value = self.minimax( self.TurnBasedGameState(next_state, None), D - 1) if h_value < worst_value: worst_value = h_value return worst_value
def rb_minimax(self, state: TurnBasedGameState, depth: int): if state.game_state.turn_number == state.game_state.game_duration_in_turns: if state.game_state.current_winner == self.player_index: return state.game_state.snakes[self.player_index].length ** 2 else: return -1 if len(state.game_state.living_agents) == 0: return -1 if depth == 0: return heuristic(state.game_state, self.player_index) if state.turn == self.Turn.AGENT_TURN: current_max = np.NINF for action in state.game_state.get_possible_actions(self.player_index): value = self.rb_minimax(self.TurnBasedGameState(state.game_state, action), depth) current_max = max(current_max, value) return current_max else: current_min = np.Inf for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) value = self.rb_minimax(self.TurnBasedGameState(next_state, None), depth - 1) current_min = min(current_min, value) return current_min
def get_action(self, state: GameState) -> GameAction: # Very similar to greedy agent get action, but now instead of picking an action with highest value we pick # action with highest avg value, so in avg our snake will do good best_actions = state.get_possible_actions(player_index=self.player_index) best_value = -np.inf for action in state.get_possible_actions(player_index=self.player_index): avg_value = 0 actions_len = 0 for opponents_actions in state.get_possible_actions_dicts_given_action(action, player_index=self.player_index): opponents_actions[self.player_index] = action next_state = get_next_state(state, opponents_actions) h_value = _heuristic_for_tournament(next_state, self.player_index) avg_value += h_value actions_len += 1 if len(state.opponents_alive) > 2: # consider only 1 possible opponents actions to reduce time & memory: break avg_value /= actions_len # choosing action according to the avg value we got preforming this action if avg_value > best_value: best_value = avg_value best_actions = [action] elif avg_value == best_value: best_actions.append(action) return np.random.choice(best_actions)
def abminimax(self, state: TurnBasedGameState, D: int, alpha, beta): if D == 0 or not state.game_state.snakes[ self.player_index].alive or state.game_state.is_terminal_state: return heuristic(state.game_state, self.player_index) best_value = -np.inf worst_value = np.inf if state.agent_action is None: for our_action in state.game_state.get_possible_actions( player_index=self.player_index): h_value = self.abminimax( self.TurnBasedGameState(state.game_state, our_action), D, alpha, beta) best_value = max(h_value, best_value) alpha = max(alpha, best_value) if best_value >= beta: return np.inf return best_value else: for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) h_value = self.abminimax( self.TurnBasedGameState(next_state, None), D - 1, alpha, beta) worst_value = min(worst_value, h_value) beta = min(worst_value, beta) if worst_value <= alpha: return -np.inf return worst_value
def minimax(self, state: MinimaxAgent.TurnBasedGameState, depth: int, alpha=-np.inf, beta=np.inf) -> float: if state.game_state.is_terminal_state or depth > 5 or state.game_state.get_possible_actions( player_index=self.player_index).__len__ is 0: return heuristic(state.game_state, self.player_index) if state.turn == self.Turn.OPPONENTS_TURN: curr_min = np.inf for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) our_turn = self.TurnBasedGameState(next_state, None) curr_min = min(self.minimax(our_turn, depth + 1, alpha, beta), curr_min) beta = min(beta, curr_min) if curr_min <= alpha: return -np.inf # print(curr_max) return curr_min else: curr_max = -np.inf for agent_action in state.game_state.get_possible_actions( player_index=self.player_index): opp_turn = self.TurnBasedGameState(state.game_state, agent_action) curr_max = max(self.minimax(opp_turn, depth, alpha, beta), curr_max) alpha = max(curr_max, alpha) if curr_max >= beta: return np.inf return curr_max
def RB_alphaBeta(self, state: MinimaxAgent.TurnBasedGameState, depth, alpha, beta): if state.game_state.is_terminal_state: return self.utility(state) if depth == 0: return heuristic(state.game_state, self.player_index) if state.turn == self.Turn.AGENT_TURN: cur_max = -np.inf for action in state.game_state.get_possible_actions(player_index=self.player_index): next_state = self.TurnBasedGameState(state.game_state, action) v = self.RB_alphaBeta(next_state, depth - 1, alpha, beta) cur_max = max(v, cur_max) alpha = max(cur_max, alpha) if cur_max >= beta: return np.inf return cur_max else: cur_min = np.inf for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) tb_next_state = self.TurnBasedGameState(next_state, None) v = self.RB_alphaBeta(tb_next_state, depth - 1, alpha, beta) cur_min = min(v, cur_min) beta = min(cur_min, beta) if cur_min <= alpha: return -np.inf return cur_min
def get_action(self, state: GameState) -> GameAction: # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later start = time.time() best_actions = state.get_possible_actions( player_index=self.player_index) best_value = -np.inf for action in state.get_possible_actions( player_index=self.player_index): for opponents_actions in state.get_possible_actions_dicts_given_action( action, player_index=self.player_index): opponents_actions[self.player_index] = action next_state = get_next_state(state, opponents_actions) h_value = self._heuristic(next_state) if h_value > best_value: best_value = h_value best_actions = [action] elif h_value == best_value: best_actions.append(action) if len(state.opponents_alive) > 2: # consider only 1 possible opponents actions to reduce time & memory: break end = time.time() self.counter_steps += 1 self.avg_time = ((end - start) + self.avg_time * (self.counter_steps - 1)) / self.counter_steps return np.random.choice(best_actions)
def alpha_beta_value(self, state: MinimaxAgent.TurnBasedGameState, agent_to_play, depth, alpha, beta): if state.game_state.is_terminal_state or depth == 0: return heuristic(state.game_state, self.player_index) turn = state.turn if turn == agent_to_play: cur_max = float('-inf') for action in state.game_state.get_possible_actions( self.player_index): state.agent_action = action v = self.alpha_beta_value(state, agent_to_play, depth, alpha, beta) cur_max = max(v, cur_max) alpha = max(cur_max, alpha) if cur_max >= beta: return float('inf') return cur_max else: cur_min = float('inf') for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) turn_next_state = MinimaxAgent.TurnBasedGameState( next_state, None) v = self.alpha_beta_value(turn_next_state, agent_to_play, depth - 1, alpha, beta) cur_min = min(v, cur_min) if cur_min <= alpha: return float('-inf') return cur_min
def __RB_Minimax__(self, state: TurnBasedGameState, depth): if state.game_state.is_terminal_state: return heuristic(state.game_state, self.player_index) if depth <= 0: assert (depth == 0) return heuristic(state.game_state, self.player_index) if state.turn == self.Turn.AGENT_TURN: cur_max = -np.inf for action in state.game_state.get_possible_actions( self.player_index): state.agent_action = action cur_value = self.__RB_Minimax__(state, depth) cur_max = max(cur_max, cur_value) return cur_max else: assert state.turn == self.Turn.OPPONENTS_TURN cur_min = np.inf for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, self.player_index): next_state = get_next_state(state.game_state, opponents_actions) next_state_with_turn = self.TurnBasedGameState( next_state, None) cur_min = min( cur_min, self.__RB_Minimax__(next_state_with_turn, depth - 1)) return cur_min
def get_action_wrapper(self, state: MinimaxAgent.TurnBasedGameState, dep: int, alpha: float, beta: float) -> float: if dep == 0 or state.game_state.is_terminal_state: return heuristic(state.game_state, self.player_index) turn = state.turn if turn == MinimaxAgent.Turn.AGENT_TURN: curr_max = -np.inf all_actions = state.game_state.get_possible_actions( self.player_index) for action in all_actions: state.agent_action = action temp_val = self.get_action_wrapper(state, dep, alpha, beta) curr_max = max(curr_max, temp_val) alpha = max(curr_max, alpha) if curr_max >= beta: return np.inf return curr_max else: assert (MinimaxAgent.Turn.OPPONENTS_TURN == turn) curr_min = np.inf for opponenets_action in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, self.player_index): next_state = get_next_state(state.game_state, opponenets_action) next_state_with_turn = MinimaxAgent.TurnBasedGameState( next_state, None) temp_val = self.get_action_wrapper(next_state_with_turn, dep - 1, alpha, beta) curr_min = min(curr_min, temp_val) beta = min(curr_min, beta) if curr_min <= alpha: return -np.inf return curr_min
def _RB_minimax(self, tb_state: TurnBasedGameState, deciding_agent: Turn, d: int): if tb_state.game_state.is_terminal_state: return self._utility(tb_state.game_state) if d == 0: return heuristic(tb_state.game_state, self.player_index) if deciding_agent == self.Turn.AGENT_TURN: actions = tb_state.game_state.get_possible_actions( player_index=self.player_index) cur_max = -np.inf for action in actions: tb_state.agent_action = action value = self._RB_minimax(tb_state, self.Turn.OPPONENTS_TURN, d) cur_max = max(cur_max, value) return cur_max else: cur_min = np.inf for opponents_actions in tb_state.game_state.get_possible_actions_dicts_given_action( tb_state.agent_action, player_index=self.player_index): next_state = get_next_state(tb_state.game_state, opponents_actions) new_tb_state = self.TurnBasedGameState(next_state, None) value = self._RB_minimax(new_tb_state, self.Turn.AGENT_TURN, d - 1) cur_min = min(cur_min, value) return cur_min
def get_action(self, state: GameState) -> GameAction: if self.is_trap(state): return self.trap_escape(state) # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later best_actions = state.get_possible_actions( player_index=self.player_index) best_value = -np.inf for action in state.get_possible_actions( player_index=self.player_index): for opponents_actions in state.get_possible_actions_dicts_given_action( action, player_index=self.player_index): opponents_actions[self.player_index] = action next_state = get_next_state(state, opponents_actions) h_value = self.tournament_heuristic(next_state) if h_value > best_value: best_value = h_value best_actions = [action] elif h_value == best_value: best_actions.append(action) return np.random.choice(best_actions)
def alphabeta(self, state: MinimaxAgent.TurnBasedGameState, player_index: int, depth: int, alpha: float, beta: float): # check if we are at max node if state.turn == MinimaxAgent.Turn.AGENT_TURN: if state.game_state.turn_number == state.game_state.game_duration_in_turns or not state.game_state.snakes[ player_index].alive: return state.game_state.snakes[player_index].length, state.agent_action if depth == 0: return heuristic(state.game_state, player_index), state.agent_action best_action = None max_value = -np.inf for action in state.game_state.get_possible_actions(player_index=player_index): turn_state = self.TurnBasedGameState(state.game_state, action) # passing alpha and beta to the next node next_state_value, _ = self.alphabeta(turn_state, player_index, depth, alpha, beta) if next_state_value > max_value: best_action = action max_value = next_state_value # determine alpha according to the max value we currently have alpha = max(max_value, alpha) if max_value >= beta: return np.inf, best_action return max_value, best_action else: best_action = None min_value = np.inf for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action, player_index=self.player_index): next_state = get_next_state(state.game_state, opponents_actions) turn_state = self.TurnBasedGameState(next_state, None) # passing alpha and beta to the next node next_state_value, action = self.alphabeta(turn_state, player_index, depth - 1, alpha, beta) if next_state_value < min_value: best_action = action min_value = next_state_value # determine beta according to the min value we currently have beta = min(min_value, beta) if min_value <= alpha: return -np.inf, best_action return min_value, best_action
def RB_minimax(self, state: TurnBasedGameState, depth): if state.game_state.is_terminal_state: return self.utility(state) if depth == 0: return heuristic(state.game_state, self.player_index) if state.turn == self.Turn.AGENT_TURN: cur_max = -np.inf for action in state.game_state.get_possible_actions(player_index=self.player_index): next_state = self.TurnBasedGameState(state.game_state, action) v = self.RB_minimax(next_state, depth - 1) cur_max = max(v, cur_max) return cur_max else: cur_min = np.inf for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) tb_next_state = self.TurnBasedGameState(next_state, None) v = self.RB_minimax(tb_next_state, depth) cur_min = min(v, cur_min) return cur_min
def abminimax(self, state: TurnBasedGameState, D: int, alpha, beta): if D == 0 or not state.game_state.snakes[ self.player_index].alive or state.game_state.is_terminal_state: return heuristic(state.game_state, self.player_index) best_value = -np.inf worst_value = np.inf if state.agent_action is None: for our_action in state.game_state.get_possible_actions( player_index=self.player_index): h_value = self.abminimax( self.TurnBasedGameState(state.game_state, our_action), D, alpha, beta) if h_value > best_value: best_value = h_value alpha = max(alpha, best_value) if best_value >= beta: # print("cut beta in depth: {}, beta is: {}, alpha is: {}".format(D, beta, alpha)) return np.inf # if not state.game_state.snakes[self.player_index].alive: # print("i entered agent with a dead snake, returning {}", format(best_value)) return best_value else: for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) # print("entered None ") h_value = self.abminimax( self.TurnBasedGameState(next_state, None), D - 1, alpha, beta) if h_value < worst_value: worst_value = h_value beta = min(worst_value, beta) if worst_value <= alpha: # print("cut alpha in depth: {}, beta is: {}, alpha is: {}".format(D, beta, alpha)) return -np.inf return worst_value
def minimax(self, state: TurnBasedGameState, depth: int) -> float: if state.game_state.is_terminal_state or depth > 3 or state.game_state.get_possible_actions( player_index=self.player_index).__len__ is 0: return heuristic(state.game_state, self.player_index) if state.turn == self.Turn.OPPONENTS_TURN: curr_min = np.inf for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, player_index=self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) our_turn = self.TurnBasedGameState(next_state, None) curr_min = min(self.minimax(our_turn, depth + 1), curr_min) #print(curr_max) return curr_min else: curr_max = -np.inf for agent_action in state.game_state.get_possible_actions( player_index=self.player_index): opp_turn = self.TurnBasedGameState(state.game_state, agent_action) curr_max = max(self.minimax(opp_turn, depth), curr_max) return curr_max
def rb_minimax(self, state: TurnBasedGameState, player_index: int, depth: int): # check if we are at maximum node if state.turn == MinimaxAgent.Turn.AGENT_TURN: # if we finished the game or our snake is dead we return the snake`s length if state.game_state.turn_number == state.game_state.game_duration_in_turns or not state.game_state.snakes[ player_index].alive: return state.game_state.snakes[player_index].length, state.agent_action # using heuristic if we reached depth of 0 if depth == 0: return heuristic(state.game_state, player_index), state.agent_action best_action = None max_value = -np.inf # going over our player possible actions and returning the action with max value for action in state.game_state.get_possible_actions(player_index=player_index): # creates turn state with the action we are checking turn_state = self.TurnBasedGameState(state.game_state, action) next_state_value, _ = self.rb_minimax(turn_state, player_index, depth) if next_state_value > max_value: best_action = action max_value = next_state_value return max_value, best_action # minimum node else: best_action = None min_value = np.inf # going over all opponents possible actions, and returning the actions with minimum value for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action, player_index=self.player_index): # building next state next_state = get_next_state(state.game_state, opponents_actions) turn_state = self.TurnBasedGameState(next_state, None) # getting the value for the state we build (calling minimax with depth smaller by 1) next_state_value, action = self.rb_minimax(turn_state, player_index, depth - 1) if next_state_value < min_value: best_action = action min_value = next_state_value return min_value, best_action
def minimax_value(self, state: TurnBasedGameState, agent_to_play, depth): if state.game_state.is_terminal_state or depth == 0: return heuristic(state.game_state, self.player_index) turn = state.turn if turn == agent_to_play: cur_max = float('-inf') for action in state.game_state.get_possible_actions( self.player_index): state.agent_action = action v = self.minimax_value(state, agent_to_play, depth) cur_max = max(v, cur_max) return cur_max else: cur_min = float('inf') for opponents_actions in state.game_state.get_possible_actions_dicts_given_action( state.agent_action, self.player_index): opponents_actions[self.player_index] = state.agent_action next_state = get_next_state(state.game_state, opponents_actions) turn_next_state = self.TurnBasedGameState(next_state, None) v = self.minimax_value(turn_next_state, agent_to_play, depth - 1) cur_min = min(v, cur_min) return cur_min
player[env.BLACK] = algos[env.BLACK](env.BLACK) state = env.get_initial_state() proc = env.SimpleStateProcessor(state) proc.process() color = env.WHITE while not proc.is_terminal(): if args['print_moves']: os.system('clear') print('\n\n\n') env.print_state(state) print("\n{} ({}) IS MOVING...".format( 'WHITE' if color == env.WHITE else 'BLACK', algos[color].__name__)) a = player[color].get_action(state, timer=api.Timer(args['time'])) player[env.WHITE].update_move(a) player[env.BLACK].update_move(a) state = env.get_next_state(state, a, color) proc = env.SimpleStateProcessor(state) proc.process() color = env.get_oponent_color(color) if args['print_moves']: os.system('clear') print('\n\n\n') env.print_state(state) if proc.get_winner() == env.WHITE: print("WHITE ({}) WON".format(algos[env.WHITE].__name__)) elif proc.get_winner() == env.BLACK: print("BLACK ({}) WON".format(algos[env.BLACK].__name__)) else: print("DRAW")