def successors(self, state): """successors. The successors function must return (or yield) a list of pairs (a, s) in which a is the action played to reach the state s. :param state: the state for which we want the successors """ next_player = state.get_next_player() is_our_turn = next_player == self.position successors = list() if not is_our_turn and self._already_tracked(state): return list() for action in SeegaRules.get_player_actions(state, next_player): next_state = deepcopy(state) if SeegaRules.act(next_state, action, next_player): successors.append((action, next_state)) # Sort states with their evaulation values (reverse : min/max) successors.sort(key=lambda x: self.evaluate(x[1]), reverse=not is_our_turn) # logging.info(f'Next states for {["oponent", "us"][is_our_turn]} -> {successors}') # Get all not already tracked states if loosing and is our turn if is_our_turn and self._alpha_winning(state) < 0.5: not_tracked = list( filter(lambda elem: not self._already_tracked(elem[1]), successors)) if not_tracked: successors = not_tracked return successors
def evaluate(self, state, details=False): """ The evaluate function returns a value representing the utility function of the board. """ # TODO necessity to make eval fucnction symmetric ?? is_end = SeegaRules.is_end_game(state) captured = state.score[self.ME] - state.score[self.OTHER] other_is_stuck = state.phase == 2 and SeegaRules.is_player_stuck( state, self.OTHER) control_center = state.board.get_cell_color((2, 2)) == self.color # weights of liner interpolation of heuristics w_end = 100 * (-1 if captured < 0 else (0 if captured == 0 else 1)) w_capt = 1 w_stuck = 1 w_center = 0.6 random = .001 * np.random.random( ) # random is to avoid always taking the first move when there is a draw if not details: return w_capt * captured + \ w_stuck * (1 if other_is_stuck else 0) + \ w_end * (1 if is_end else 0) + \ w_center * (1 if control_center else 0) + \ random else: return { 'captured': captured, 'other_is_stuck': other_is_stuck, 'is_end': is_end, 'control_center': control_center }
def successors(self, state): possible_actions = SeegaRules.get_player_actions( state, self.color.value) for action in possible_actions: next_state, done = SeegaRules.make_move(deepcopy(state), action, self.color.value) yield action, next_state
def successors(self, state): """successors. The successors function must return (or yield) a list of pairs (a, s) in which a is the action played to reach the state s. :param state: the state for which we want the successors """ for action in SeegaRules.get_player_actions(state, self.position): next_state = deepcopy(state) SeegaRules.act(next_state, action, self.position) yield action, next_state
def successors(self, state): possible_actions = SeegaRules.get_player_actions( state, self.color.value) print("POSSIBLE MOVES :") for i, action in enumerate(possible_actions): print(i, action) move = int(input("SELECTED MOVE ?")) next_state, done = SeegaRules.make_move(deepcopy(state), possible_actions[move], self.color.value) print(f"SELECTION : {move} - {possible_actions[move]}\n{next_state}") yield possible_actions[move], next_state
def defensive_evaluation(self, state): defensive_coef = 1 / 8 # how safe the agent plays if state.phase == 2: score = state.get_player_info(self.position)["score"] opp_score = state.get_player_info(self.position * -1)["score"] balance = score - opp_score if SeegaRules.is_end_game(state) and balance < 0: return float('-inf') elif SeegaRules.is_end_game(state) and balance > 0: return float('inf') else: return defensive_coef + defensive_coef * self.safety_evaluation( state) else: return 0
def cutoff(self, state, depth): """cutoff. The cutoff function returns true if the alpha-beta/minimax search has to stop and false otherwise. :param state: the state for which we want to know if we have to apply the cutoff :param depth: the depth of the cutoff """ def timing_cutoff(): return self._running_time > self._max_running_time def depth_cutoff(): return depth > self._max_depth is_cutoff = False # Check if the game is at the end is_cutoff |= SeegaRules.is_end_game(state) # Get the cutoff from the current depth is_cutoff |= depth_cutoff() # Get the current cutoff from the time running the minimax is_cutoff |= timing_cutoff() # Track the maximum depth self._running_depth = max(self._running_depth, depth) return is_cutoff
def cutoff(self, state, depth): """cutoff. The cutoff function returns true if the alpha-beta/minimax search has to stop and false otherwise. :param state: the state for which we want to know if we have to apply the cutoff :param depth: the depth of the cutoff """ return SeegaRules.is_end_game(state) or depth > 0
def cutoff(self, state, depth): """ The cutoff function returns true if the alpha-beta/minimax search has to stop and false otherwise. """ game_over = SeegaRules.is_end_game(state) max_depth = depth == self.max_depth or depth == absolute_max_depth cutoff = game_over or max_depth return cutoff
def make_self_play_move(self, state, fallback_function): for action, s in self.successors(state): if SeegaRules.is_player_stuck(s, self.OTHER): print(" - SELF PLAY MOVE FOUND") return action print(" - NO SELF PLAY MOVE FOUND, CONTINUE") self.repeat_boring_moves = False return fallback_function(state)
def play(self, state, remain_time): print("") print(f"Player {self.position} is playing.") print("time remain is ", remain_time, " seconds") if self._total_time is None: self._total_time = remain_time self._remaining_time = remain_time def time_policy(policy="exponential", min_frac=1 / 2000, max_frac=1 / 100): """time_policy. :param policy: the desired policy between ('linear', 'exponential') :param min_frac: the minimum fraction of total time allowed for computing time :param max_frac: the maximum fraction of total time allowed for computing time """ min, max = self._total_time * min_frac, self._total_time * max_frac # Calculate the linear and exponential policy logging.info(f"alpha time : {self._alpha_time}") schedulers = dict( linear=min + self._alpha_time * (max - min), exponential=min + (np.exp(self._alpha_time * math.log(2)) - 1) * (max - min), ) return schedulers[policy] self._max_running_time = time_policy("exponential") print(self._max_running_time) # Begining the search self._start_minimax = perf_counter() best_action = self.iterative_deepening(state) tracked_state = deepcopy(state) SeegaRules.act(tracked_state, best_action, self.position) self._track_state(tracked_state) return best_action
def successors(self, state: SeegaState): """ The successors function must return (or yield) a list of pairs (a, s) in which a is the action played to reach the state s. """ if state in self.cache_successors: self.cache_successors['hits'] += 1 return self.cache_successors[state] next_player = state.get_next_player() possible_actions = SeegaRules.get_player_actions(state, next_player) succ = [] for action in possible_actions: next_state, done = SeegaRules.make_move(deepcopy(state), action, next_player) succ.append((action, next_state)) self.cache_successors['misses'] += 1 self.cache_successors[state] = succ return succ
def successors(self, state): # #player = state._next_player # actions = SeegaRules.get_player_actions(state, self.color.value) # SeegaRules.act(s, a, self.color.value) next_player = state._next_player actions = SeegaRules.get_player_actions(state, next_player) successors = list() for a in actions: s = deepcopy(state) possible_states = SeegaRules.act(s, a, next_player) if possible_states: successors.append((a, possible_states[0])) if state.phase == 2: successors.sort(key=lambda t: self.evaluate(t[1]), reverse=next_player != self.position) return successors
def play(self, state, remaining_time): self.move_nb += 1 state.__class__ = State print( f"\nPlayer {self.ME} is playing with {remaining_time} seconds remaining for move #{self.move_nb}" ) print(f"CacheInfo : " f"hits={self.cache_successors['hits']}, " f"misses={self.cache_successors['misses']}, " f"currsize={len(self.cache_successors) - 2}") print(f"{state} evaluation={self.evaluate(state):.2f}\n") # TODO remove obsolete since stuck player fix # if self.repeat_boring_moves: # fast-forward to save time # assert state.get_latest_player() == self.ME, \ # " - ERROR : May not repeat boring moves, latest player isn't self" # print(" - PLAYING BOREDOM") # return self.reverse_last_move(state) if self.max_time is None: self.max_time = remaining_time self.typical_time = remaining_time / self.max_nb_moves self.remaining_time = remaining_time possible_actions = SeegaRules.get_player_actions( state, self.color.value) if len(possible_actions) == 1: best_action = possible_actions[0] elif state.phase == 1: best_action = SeegaRules.random_play( state, self.ME) # TODO play smart during phase 1 else: # phase == 2 # TODO remove obsolete since stuck player fix # if self.can_start_self_play(state): # best_action = self.make_self_play_move(state, fallback_function=self.iterative_deepening) best_action = self.iterative_deepening(state) print(f" - SELECTED ACTION : {best_action}") self.last_action = best_action return best_action
def successors(self, state: SeegaState): """ The successors function must return (or yield) a list of pairs (a, s) in which a is the action played to reach the state s. """ next_player = state.get_next_player() possible_actions = get_possible_actions(state, next_player) succ = [] for action in possible_actions: next_state, done = SeegaRules.make_move(deepcopy(state), action, next_player) succ.append((action, next_state)) return succ
def successors(self, state): """successors. The successors function must return (or yield) a list of pairs (a, s) in which a is the action played to reach the state s. :param state: the state for which we want the successors """ next_player = state.get_next_player() is_our_turn = next_player == self.position successors = list() if not is_our_turn and self._already_tracked(state): return list() actions = SeegaRules.get_player_actions(state, next_player) if state.phase == 2: nn_actions = self._agent._state_to_actions(state, reverse=not is_our_turn) actions = list( filter( lambda x: any([self._action_eq(x, a) for a in actions]), nn_actions, )) for action in actions: next_state = deepcopy(state) if SeegaRules.act(next_state, action, next_player): successors.append((action, next_state)) # Get all not already tracked states if loosing and is our turn if is_our_turn and self._alpha_winning(state) < 0.5: not_tracked = list( filter(lambda elem: not self._already_tracked(elem[1]), successors)) if not_tracked: successors = not_tracked return successors
def cutoff(self, state, depth): if state in self.state_dict and self.state_dict[state] == [ state.score[-1], state.score[1] ]: # Redundant state return True else: self.state_dict[state] = [state.score[-1], state.score[1]] if SeegaRules.is_end_game(state): return True else: if state.phase == 1 and depth > 0: return True if depth > self.depth: return True else: if time.time() - self.start_time > self.remaining_time: return True else: return False
def opponent_captures(self, state): dimension = state.board.board_shape square = state.board.get_board_state() opp_color = self.position * -1 player_color = self.position opp_max_cap = 0 opp_possible_cap = 0 opp_pieces_on_board = state.board.get_player_pieces_on_board( Color(self.position * -1)) for piece in opp_pieces_on_board: moves = SeegaRules.get_effective_cell_moves(state, piece) if len(moves) > 0: move_threat = 0 for move in moves: # map two steps position gaps = [(move[0] + a[0], move[1] + a[1]) for a in [(0, 2), (0, -2), (2, 0), (-2, 0)] if (0 <= move[0] + a[0] < dimension[0]) and ( 0 <= move[1] + a[1] < dimension[1])] # map one steps position neig = [(move[0] + a[0], move[1] + a[1]) for a in [(0, 1), (0, -1), (1, 0), (-1, 0)] if ((0 <= move[0] + a[0] < dimension[0]) and ( 0 <= move[1] + a[1] < dimension[1]) and ( (move[0] + a[0], move[1] + a[1]) != (dimension[0] // 2, dimension[1] // 2)))] for i in range(len(moves)): if i < len(gaps) and i < len(neig): if square[gaps[i][0]][gaps[i][1]] == ( opp_color ) and square[neig[i][0]][neig[i][1]] == player_color: move_threat += 1 opp_possible_cap += 1 if move_threat > opp_max_cap: opp_max_cap = move_threat return opp_possible_cap, opp_max_cap
def can_start_self_play(self, state): """ If state is winnable by repeating boring moves, set repeat_boring_moves to True and return True """ # TODO do not do self_play if game can be won by exploring whole search tree till end (and win) other_actions = SeegaRules.get_player_all_cases_actions( state, self.OTHER) pieces_captured = self.evaluate(state, details=True)[ 'captured'] # TODO optimize (no need to compute whole eval) if pieces_captured == state.MAX_SCORE - 1: # other has only one piece left print("OTHER HAS ONLY ONE PIECE LEFT") self.repeat_boring_moves = True return True if len( other_actions ) == 0 and pieces_captured > 0: # opponent is blocked and has less captured pieces print("OTHER IS BLOCKED AND SELF HAS ADVANTAGE") self.repeat_boring_moves = True return True return False
def evaluate(self, state): """evaluate. The evaluate function must return an integer value representing the utility function of the board. :param state: the state for which we want the evaluation scalar """ cell_groups = dict( center=(2, 2), star_center=[(2, 1), (2, 3), (1, 2), (3, 2)], square_center=[(1, 1), (1, 3), (3, 1), (3, 3)], star_ext=[(2, 0), (2, 4), (0, 2), (4, 2)], square_ext=[(0, 0), (0, 4), (4, 0), (4, 4)], ) def player_wins(player): return state.score[player] == state.MAX_SCORE def evaluate_cells(color): def is_player_cell(cell, color): return state.board.get_cell_color(cell) == color def direct_center_score(): score = 0.0 for base_cell in cell_groups["star_center"]: for oponent_cell, ext_cell in zip( cell_groups["star_center"], cell_groups["star_ext"]): if (is_player_cell(base_cell, color) and is_player_cell(oponent_cell, -color) and is_player_cell(ext_cell, color)): score += 1 return score score = 0.0 if state.phase == 1: score += direct_center_score() else: score += is_player_cell(cell_groups["center"], color) return score score = 0.0 if state.phase == 1: score += evaluate_cells(self.position) score -= evaluate_cells(-self.position) elif state.phase == 2: # Self score score += state.score[self.position] score -= state.score[-self.position] score += evaluate_cells(self.color) score -= evaluate_cells(self.oponent) # Winning state if SeegaRules.is_end_game(state): score += 100 if self._alpha_winning(state) > 0.5 else -100 return score
def get_possible_actions(state, player_id): return SeegaRules.get_player_all_cases_actions(state, player_id)
def is_end_game(state): return SeegaRules.is_end_game(state)
def is_player_stuck(state, player_id): return SeegaRules.is_player_stuck(state, player_id)
def cutoff(self, state, depth): game_over = SeegaRules.is_end_game(state) return game_over or depth == 1 # greedy search
def get_opponent_neighbours(board, cell, player_id): return SeegaRules._get_opponent_neighbours(board, cell, player_id)
def play(self, state, remain_time): print(f"\nPlayer {self.position} is playing.") print("time remain is ", remain_time, " seconds") print(state) return SeegaRules.random_play(state, self.position)