def __init__(self): # env initialization self.actions = {} self.observation = [] self.reward = 0 self.done = False self.last_action_idx = 0 # initialize the board self.board = CheckerBoard() self.width = len(self.board.get_state_vector()) self.height = 1 self.win_reward = 100 self.defeat_reward = -100 self.game_turns = 0 self.score = 0 self.enable_capturing_reward = False for idx, move in enumerate(self.board.get_all_moves()): self.actions[idx] = move print("total actions: ", len(self.actions)) self.action_space_size = len(self.actions) self.reset()
def run_single_game(self): board = CheckerBoard() turn = 0 unresolved = False while not board.is_over(): turn += 1 log_file.write("#### Turn %3d\n" % turn) log_file.write(str(board)) log_file.flush() if turn % 100 == 0: print("Over %d turns played" % turn) for player, agent in self.players.items(): while not board.is_over() and board.active == player: print("Player %d is making a decision" % player) start_time = time.time() move = agent.best_move(board) self.stats["thinking_time"][player].append(time.time() - start_time) board.update(move) if turn > 200: unresolved = True break self.stats["score"].append(board.winner if not unresolved else -1) self.stats["played_rounds"] += turn
def reset(self): self.board_impl = CheckerBoard(self.n) self.states_history = {} self.black_own_history_queue = deque([], maxlen=self.history_n) self.black_enemy_history_queue = deque([], maxlen=self.history_n) self.white_own_history_queue = deque([], maxlen=self.history_n) self.white_enemy_history_queue = deque([], maxlen=self.history_n) initial_state = np.array(self.board_impl.get_true_state()) initial_state_black_own_history = self.board_impl.get_state_matrix_own(BLACK_PLAYER) initial_state_black_enemy_history = self.board_impl.get_state_matrix_enemy(BLACK_PLAYER) initial_state_white_own_history = self.board_impl.get_state_matrix_own(WHITE_PLAYER) initial_state_white_enemy_history = self.board_impl.get_state_matrix_enemy(WHITE_PLAYER) for idx in range(self.history_n): self.black_own_history_queue.append(initial_state_black_own_history) self.black_enemy_history_queue.append(initial_state_black_enemy_history) self.white_own_history_queue.append(initial_state_white_own_history) self.white_enemy_history_queue.append(initial_state_white_enemy_history) self.has_repeated_states = False
def restart_environment_episode(self): self.board = CheckerBoard() self.update_game_info() return self.observation
class CheckersEnvironmentWrapper: def __init__(self): # env initialization self.actions = {} self.observation = [] self.reward = 0 self.done = False self.last_action_idx = 0 # initialize the board self.board = CheckerBoard() self.width = len(self.board.get_state_vector()) self.height = 1 self.win_reward = 100 self.defeat_reward = -100 self.game_turns = 0 self.score = 0 self.enable_capturing_reward = False for idx, move in enumerate(self.board.get_all_moves()): self.actions[idx] = move print("total actions: ", len(self.actions)) self.action_space_size = len(self.actions) self.reset() def update_game_info(self): self.observation = self.board.get_state_vector() def restart_environment_episode(self): self.board = CheckerBoard() self.update_game_info() return self.observation def _idx_to_action(self, action_idx): return self.actions[action_idx] def get_valid_idx_actions(self): possible_idx_actions = [] possible_moves = self.board.get_legal_moves() for idx, action in self.actions.items(): if action in possible_moves: possible_idx_actions.append(idx) return possible_idx_actions def step(self, action_idx): assert self.board.get_current_player( ) == self.board.BLACK_PLAYER, "Training player should be black!" self.last_action_idx = action_idx action = self.actions[action_idx] # print("take action ", action_idx, " : ", action) white_pieces_before = self.board.get_white_num( ) + self.board.get_white_kings_num() white_kings_pieces_before = self.board.get_white_kings_num() black_pieces_before = self.board.get_black_num( ) + self.board.get_black_kings_num() black_kings_pieces_before = self.board.get_black_kings_num() self.board.make_move(action) if self.board.get_current_player() == self.board.WHITE_PLAYER: if not self.board.is_over(): # make AI opponent move self.opponent_move() self.update_game_info() white_pieces = self.board.get_white_num( ) + self.board.get_white_kings_num() white_kings_pieces = self.board.get_white_kings_num() black_pieces = self.board.get_black_num( ) + self.board.get_black_kings_num() black_kings_pieces = self.board.get_black_kings_num() if self.board.is_over(): print("black: p. %d, k. %d, white: p. %d, k. %d" % (black_pieces, black_kings_pieces, white_pieces, white_kings_pieces)) if self.board.get_winner() == self.board.BLACK_PLAYER: # black wins print("black wins") self.reward = self.win_reward else: print("white wins") self.reward = self.defeat_reward else: if self.enable_capturing_reward: captured_whites = white_pieces_before - white_pieces captured_black = black_pieces_before - black_pieces self.reward = captured_whites - captured_black else: self.reward = 0 self.score += self.reward self.game_turns += 1 self.done = self.board.is_over() return self.observation, self.reward, self.done def opponent_move(self): current_player = self.board.get_current_player() moves = self.board.get_legal_moves() action = random.choice(moves) # print("opponent takes action ", action) self.board.make_move(action) if self.board.get_current_player() == current_player: # print("opponent takes a jump") self.opponent_move() def reset(self): self.restart_environment_episode() self.done = False self.reward = 0.0 self.last_action_idx = 0 self.game_turns = 0 self.score = 0 return self.observation, self.reward, self.done
self.repaint_board() def move_ai_piece(self): self.board.flip_board_nocopy() # flip it for use best_move = get_moves_computer(self.board) self.board.move_piece(best_move.oldLoc, best_move.newLoc) self.board.flip_board_nocopy() # flip it back def deselect_piece(self): self.selected = None self.repaint_board() matrixChecker = CheckerBoard() c = CheckerBoardGUI(matrixChecker) root = Tk() b = Button(root, text="Deselect Piece", command=c.deselect_piece) b.pack() canvas = Canvas(root, width=400, height=400) canvas.pack() c.draw_checkerboard() canvas.bind("<Button-1>", c.click_piece) canvas.pack() canvas.bind("<Button-2>", c.move_selected_piece) canvas.pack() c.repaint_board() root.mainloop()
class CheckersGame(Game): def __init__(self, n, history_n=7, cloned=False): self.n = n self.history_n = history_n self.player_mapping = { 0: BLACK_PLAYER, 1: WHITE_PLAYER } self.actions = {} self.states_history = None self.black_own_history_queue = None self.black_enemy_history_queue = None self.white_own_history_queue = None self.white_enemy_history_queue = None if not cloned: self.reset() for idx, move in enumerate(self.board_impl.get_all_moves()): self.actions[idx] = move def reset(self): self.board_impl = CheckerBoard(self.n) self.states_history = {} self.black_own_history_queue = deque([], maxlen=self.history_n) self.black_enemy_history_queue = deque([], maxlen=self.history_n) self.white_own_history_queue = deque([], maxlen=self.history_n) self.white_enemy_history_queue = deque([], maxlen=self.history_n) initial_state = np.array(self.board_impl.get_true_state()) initial_state_black_own_history = self.board_impl.get_state_matrix_own(BLACK_PLAYER) initial_state_black_enemy_history = self.board_impl.get_state_matrix_enemy(BLACK_PLAYER) initial_state_white_own_history = self.board_impl.get_state_matrix_own(WHITE_PLAYER) initial_state_white_enemy_history = self.board_impl.get_state_matrix_enemy(WHITE_PLAYER) for idx in range(self.history_n): self.black_own_history_queue.append(initial_state_black_own_history) self.black_enemy_history_queue.append(initial_state_black_enemy_history) self.white_own_history_queue.append(initial_state_white_own_history) self.white_enemy_history_queue.append(initial_state_white_enemy_history) self.has_repeated_states = False def clone(self): obj = CheckersGame(self.n, history_n=self.history_n, cloned=True) obj.board_impl = self.board_impl.clone() obj.states_history = copy.copy(self.states_history) obj.black_own_history_queue = copy.copy(self.black_own_history_queue) obj.black_enemy_history_queue = copy.copy(self.black_enemy_history_queue) obj.white_own_history_queue = copy.copy(self.white_own_history_queue) obj.white_enemy_history_queue = copy.copy(self.white_enemy_history_queue) obj.has_repeated_states = self.has_repeated_states obj.actions = self.actions return obj def get_cur_player(self): cur_player = self.board_impl.get_current_player() if cur_player == self.board_impl.BLACK_PLAYER: return 0 else: return 1 def get_players_num(self): return 2 def get_action_size(self): return len(self.actions) def get_observation_size(self): if self.history_n != 0: return (self.history_n * 2, self.n, self.n) else: return (self.n, self.n) def make_move(self, action_idx): player = self.get_cur_player() assert 0 <= action_idx < len(self.actions), "Invalid action index" action = self.actions[action_idx] is_capturing_move = self.board_impl.make_move(action) state = np.array(self.board_impl.get_true_state()) state_hash = state.tostring() self.black_own_history_queue.append(self.board_impl.get_state_matrix_own(BLACK_PLAYER)) self.black_enemy_history_queue.append(self.board_impl.get_state_matrix_enemy(BLACK_PLAYER)) self.white_own_history_queue.append(self.board_impl.get_state_matrix_own(WHITE_PLAYER)) self.white_enemy_history_queue.append(self.board_impl.get_state_matrix_enemy(WHITE_PLAYER)) if is_capturing_move: # clear states history for repeated states # since we don't need to check for the states # which cannot be repeated due to changed # num of pieces on the board self.states_history = {} self.has_repeated_states = False if state_hash in self.states_history: repeated_states = self.states_history[state_hash] self.states_history[state_hash] = repeated_states + 1 self.has_repeated_states = True else: self.states_history[state_hash] = 1 return self.get_score(player), self.get_cur_player() def get_valid_moves(self, player): possible_idx_actions = [0] * self.get_action_size() inner_player = self.player_mapping[player] possible_moves = self.board_impl.get_legal_moves(player=inner_player) # forbid repeated states for idx, action in self.actions.items(): if action in possible_moves: possible_idx_actions[idx] = 1 if self.has_repeated_states: # simulate move board_clone = self.board_impl.clone() board_clone.set_current_player(inner_player) board_clone.make_move(action) state = np.array(board_clone.get_true_state()) state_hash = state.tostring() if state_hash in self.states_history: repeated_states = self.states_history[state_hash] if repeated_states >= 2: # Action forbidden due to the potential draw situation possible_idx_actions[idx] = 0 return np.array(possible_idx_actions) def is_ended(self): return self.is_draw() or np.sum(self.get_valid_moves(0)) == 0 or np.sum(self.get_valid_moves(1)) == 0 def is_draw(self): return self.board_impl.is_draw() def get_score(self, player): if self.is_ended(): if self.is_draw(): return -1 if np.sum(self.get_valid_moves(player)) == 0: return -1 else: return 1 return 0 def get_observation(self, player): inner_player = self.player_mapping[player] if self.history_n == 0: observation = np.array(self.board_impl.get_observation(inner_player)) else: if inner_player == BLACK_PLAYER: own_history = list(reversed(self.black_own_history_queue)) enemy_history = list(reversed(self.black_enemy_history_queue)) else: own_history = list(reversed(self.white_own_history_queue)) enemy_history = list(reversed(self.white_enemy_history_queue)) observation = [] observation.extend(own_history) observation.extend(enemy_history) observation = np.array(observation) return observation def get_observation_str(self, observation): return observation.tostring() def get_display_str(self): # return self.board_impl.get_state_str() return self.board_impl.get_true_state_str() def reset_unknown_states(self, player): pass def _get_state(self): return np.array(self.board_impl.get_true_state()) def get_custom_score(self, player): own_pieces, own_kings, enemy_pieces, enemy_kings = self.get_pieces(player) return own_pieces + 2 * own_kings - (enemy_pieces + 2 * enemy_kings) def get_pieces(self, player): inner_player = self.player_mapping[player] return self.board_impl.get_pieces(inner_player)
import random from collections import namedtuple from checkers import CheckerBoard from neuralnet import max_index, thing_1, thing_2 Move = namedtuple('Move', ['oldLoc', 'newLoc']) TrainingData = namedtuple('TrainingData', ['stateVec', 'whoWins']) game = CheckerBoard() HUMAN_PLAYING = True def get_moves_human(): print("Your pieces are", game.get_team_locs(-1)) piece_index = int(input("Select an index")) piece = game.get_team_locs(-1)[piece_index] possible_moves = game.get_possible_moves(piece[0], piece[1]) if len(possible_moves) == 0: print("No moves possible. Try again. ") return get_moves_human() print("You can move that piece to", possible_moves) if len(possible_moves) == 1: move = game.get_possible_moves(piece[0], piece[1])[0] else: move_index = int(input("Select an index")) try: move = game.get_possible_moves(piece[0], piece[1])[move_index] except (IndexError, KeyError):
max_val = n i = i return i games_per_itr = 50 if __name__ == '__main__': try: for super_itr in range(100): print("Iteration:", super_itr) thing1_data = [] thing2_data = [] for i in range(games_per_itr): # play multiple games should_display_game_results = i == games_per_itr - 1 game = CheckerBoard() game.scramble() thing1_state_vecs = [] thing2_state_vecs = [] # by flippping the board we switch teams but actually stay on team 1 according to the game object team_for_real = 1 moves_taken = 0 while not game.is_game_over()[0]: # play 1 game moves = game.get_all_possible_moves() possible_moves_lst = [] for piece, possible_piece_moves in moves.items(): for possible_piece_move in possible_piece_moves: