class CheckersEnvironmentWrapper: def __init__(self): # env initialization self.actions = {} self.observation = [] self.reward = 0 self.done = False self.last_action_idx = 0 # initialize the board self.board = CheckerBoard() self.width = len(self.board.get_state_vector()) self.height = 1 self.win_reward = 100 self.defeat_reward = -100 self.game_turns = 0 self.score = 0 self.enable_capturing_reward = False for idx, move in enumerate(self.board.get_all_moves()): self.actions[idx] = move print("total actions: ", len(self.actions)) self.action_space_size = len(self.actions) self.reset() def update_game_info(self): self.observation = self.board.get_state_vector() def restart_environment_episode(self): self.board = CheckerBoard() self.update_game_info() return self.observation def _idx_to_action(self, action_idx): return self.actions[action_idx] def get_valid_idx_actions(self): possible_idx_actions = [] possible_moves = self.board.get_legal_moves() for idx, action in self.actions.items(): if action in possible_moves: possible_idx_actions.append(idx) return possible_idx_actions def step(self, action_idx): assert self.board.get_current_player( ) == self.board.BLACK_PLAYER, "Training player should be black!" self.last_action_idx = action_idx action = self.actions[action_idx] # print("take action ", action_idx, " : ", action) white_pieces_before = self.board.get_white_num( ) + self.board.get_white_kings_num() white_kings_pieces_before = self.board.get_white_kings_num() black_pieces_before = self.board.get_black_num( ) + self.board.get_black_kings_num() black_kings_pieces_before = self.board.get_black_kings_num() self.board.make_move(action) if self.board.get_current_player() == self.board.WHITE_PLAYER: if not self.board.is_over(): # make AI opponent move self.opponent_move() self.update_game_info() white_pieces = self.board.get_white_num( ) + self.board.get_white_kings_num() white_kings_pieces = self.board.get_white_kings_num() black_pieces = self.board.get_black_num( ) + self.board.get_black_kings_num() black_kings_pieces = self.board.get_black_kings_num() if self.board.is_over(): print("black: p. %d, k. %d, white: p. %d, k. %d" % (black_pieces, black_kings_pieces, white_pieces, white_kings_pieces)) if self.board.get_winner() == self.board.BLACK_PLAYER: # black wins print("black wins") self.reward = self.win_reward else: print("white wins") self.reward = self.defeat_reward else: if self.enable_capturing_reward: captured_whites = white_pieces_before - white_pieces captured_black = black_pieces_before - black_pieces self.reward = captured_whites - captured_black else: self.reward = 0 self.score += self.reward self.game_turns += 1 self.done = self.board.is_over() return self.observation, self.reward, self.done def opponent_move(self): current_player = self.board.get_current_player() moves = self.board.get_legal_moves() action = random.choice(moves) # print("opponent takes action ", action) self.board.make_move(action) if self.board.get_current_player() == current_player: # print("opponent takes a jump") self.opponent_move() def reset(self): self.restart_environment_episode() self.done = False self.reward = 0.0 self.last_action_idx = 0 self.game_turns = 0 self.score = 0 return self.observation, self.reward, self.done
class CheckersGame(Game): def __init__(self, n, history_n=7, cloned=False): self.n = n self.history_n = history_n self.player_mapping = { 0: BLACK_PLAYER, 1: WHITE_PLAYER } self.actions = {} self.states_history = None self.black_own_history_queue = None self.black_enemy_history_queue = None self.white_own_history_queue = None self.white_enemy_history_queue = None if not cloned: self.reset() for idx, move in enumerate(self.board_impl.get_all_moves()): self.actions[idx] = move def reset(self): self.board_impl = CheckerBoard(self.n) self.states_history = {} self.black_own_history_queue = deque([], maxlen=self.history_n) self.black_enemy_history_queue = deque([], maxlen=self.history_n) self.white_own_history_queue = deque([], maxlen=self.history_n) self.white_enemy_history_queue = deque([], maxlen=self.history_n) initial_state = np.array(self.board_impl.get_true_state()) initial_state_black_own_history = self.board_impl.get_state_matrix_own(BLACK_PLAYER) initial_state_black_enemy_history = self.board_impl.get_state_matrix_enemy(BLACK_PLAYER) initial_state_white_own_history = self.board_impl.get_state_matrix_own(WHITE_PLAYER) initial_state_white_enemy_history = self.board_impl.get_state_matrix_enemy(WHITE_PLAYER) for idx in range(self.history_n): self.black_own_history_queue.append(initial_state_black_own_history) self.black_enemy_history_queue.append(initial_state_black_enemy_history) self.white_own_history_queue.append(initial_state_white_own_history) self.white_enemy_history_queue.append(initial_state_white_enemy_history) self.has_repeated_states = False def clone(self): obj = CheckersGame(self.n, history_n=self.history_n, cloned=True) obj.board_impl = self.board_impl.clone() obj.states_history = copy.copy(self.states_history) obj.black_own_history_queue = copy.copy(self.black_own_history_queue) obj.black_enemy_history_queue = copy.copy(self.black_enemy_history_queue) obj.white_own_history_queue = copy.copy(self.white_own_history_queue) obj.white_enemy_history_queue = copy.copy(self.white_enemy_history_queue) obj.has_repeated_states = self.has_repeated_states obj.actions = self.actions return obj def get_cur_player(self): cur_player = self.board_impl.get_current_player() if cur_player == self.board_impl.BLACK_PLAYER: return 0 else: return 1 def get_players_num(self): return 2 def get_action_size(self): return len(self.actions) def get_observation_size(self): if self.history_n != 0: return (self.history_n * 2, self.n, self.n) else: return (self.n, self.n) def make_move(self, action_idx): player = self.get_cur_player() assert 0 <= action_idx < len(self.actions), "Invalid action index" action = self.actions[action_idx] is_capturing_move = self.board_impl.make_move(action) state = np.array(self.board_impl.get_true_state()) state_hash = state.tostring() self.black_own_history_queue.append(self.board_impl.get_state_matrix_own(BLACK_PLAYER)) self.black_enemy_history_queue.append(self.board_impl.get_state_matrix_enemy(BLACK_PLAYER)) self.white_own_history_queue.append(self.board_impl.get_state_matrix_own(WHITE_PLAYER)) self.white_enemy_history_queue.append(self.board_impl.get_state_matrix_enemy(WHITE_PLAYER)) if is_capturing_move: # clear states history for repeated states # since we don't need to check for the states # which cannot be repeated due to changed # num of pieces on the board self.states_history = {} self.has_repeated_states = False if state_hash in self.states_history: repeated_states = self.states_history[state_hash] self.states_history[state_hash] = repeated_states + 1 self.has_repeated_states = True else: self.states_history[state_hash] = 1 return self.get_score(player), self.get_cur_player() def get_valid_moves(self, player): possible_idx_actions = [0] * self.get_action_size() inner_player = self.player_mapping[player] possible_moves = self.board_impl.get_legal_moves(player=inner_player) # forbid repeated states for idx, action in self.actions.items(): if action in possible_moves: possible_idx_actions[idx] = 1 if self.has_repeated_states: # simulate move board_clone = self.board_impl.clone() board_clone.set_current_player(inner_player) board_clone.make_move(action) state = np.array(board_clone.get_true_state()) state_hash = state.tostring() if state_hash in self.states_history: repeated_states = self.states_history[state_hash] if repeated_states >= 2: # Action forbidden due to the potential draw situation possible_idx_actions[idx] = 0 return np.array(possible_idx_actions) def is_ended(self): return self.is_draw() or np.sum(self.get_valid_moves(0)) == 0 or np.sum(self.get_valid_moves(1)) == 0 def is_draw(self): return self.board_impl.is_draw() def get_score(self, player): if self.is_ended(): if self.is_draw(): return -1 if np.sum(self.get_valid_moves(player)) == 0: return -1 else: return 1 return 0 def get_observation(self, player): inner_player = self.player_mapping[player] if self.history_n == 0: observation = np.array(self.board_impl.get_observation(inner_player)) else: if inner_player == BLACK_PLAYER: own_history = list(reversed(self.black_own_history_queue)) enemy_history = list(reversed(self.black_enemy_history_queue)) else: own_history = list(reversed(self.white_own_history_queue)) enemy_history = list(reversed(self.white_enemy_history_queue)) observation = [] observation.extend(own_history) observation.extend(enemy_history) observation = np.array(observation) return observation def get_observation_str(self, observation): return observation.tostring() def get_display_str(self): # return self.board_impl.get_state_str() return self.board_impl.get_true_state_str() def reset_unknown_states(self, player): pass def _get_state(self): return np.array(self.board_impl.get_true_state()) def get_custom_score(self, player): own_pieces, own_kings, enemy_pieces, enemy_kings = self.get_pieces(player) return own_pieces + 2 * own_kings - (enemy_pieces + 2 * enemy_kings) def get_pieces(self, player): inner_player = self.player_mapping[player] return self.board_impl.get_pieces(inner_player)