Esempio n. 1
0
    def __init__(self):
        # env initialization
        self.actions = {}
        self.observation = []
        self.reward = 0
        self.done = False
        self.last_action_idx = 0

        # initialize the board
        self.board = CheckerBoard()

        self.width = len(self.board.get_state_vector())
        self.height = 1

        self.win_reward = 100
        self.defeat_reward = -100

        self.game_turns = 0
        self.score = 0

        self.enable_capturing_reward = False

        for idx, move in enumerate(self.board.get_all_moves()):
            self.actions[idx] = move

        print("total actions: ", len(self.actions))

        self.action_space_size = len(self.actions)

        self.reset()
Esempio n. 2
0
    def run_single_game(self):
        board = CheckerBoard()
        turn = 0
        unresolved = False

        while not board.is_over():
            turn += 1

            log_file.write("#### Turn %3d\n" % turn)
            log_file.write(str(board))
            log_file.flush()

            if turn % 100 == 0:
                print("Over %d turns played" % turn)

            for player, agent in self.players.items():
                while not board.is_over() and board.active == player:
                    print("Player %d is making a decision" % player)
                    start_time = time.time()
                    move = agent.best_move(board)
                    self.stats["thinking_time"][player].append(time.time() -
                                                               start_time)
                    board.update(move)

            if turn > 200:
                unresolved = True
                break

        self.stats["score"].append(board.winner if not unresolved else -1)
        self.stats["played_rounds"] += turn
Esempio n. 3
0
    def reset(self):
        self.board_impl = CheckerBoard(self.n)

        self.states_history = {}

        self.black_own_history_queue = deque([], maxlen=self.history_n)
        self.black_enemy_history_queue = deque([], maxlen=self.history_n)
        self.white_own_history_queue = deque([], maxlen=self.history_n)
        self.white_enemy_history_queue = deque([], maxlen=self.history_n)

        initial_state = np.array(self.board_impl.get_true_state())

        initial_state_black_own_history = self.board_impl.get_state_matrix_own(BLACK_PLAYER)
        initial_state_black_enemy_history = self.board_impl.get_state_matrix_enemy(BLACK_PLAYER)
        initial_state_white_own_history = self.board_impl.get_state_matrix_own(WHITE_PLAYER)
        initial_state_white_enemy_history = self.board_impl.get_state_matrix_enemy(WHITE_PLAYER)

        for idx in range(self.history_n):
            self.black_own_history_queue.append(initial_state_black_own_history)
            self.black_enemy_history_queue.append(initial_state_black_enemy_history)
            self.white_own_history_queue.append(initial_state_white_own_history)
            self.white_enemy_history_queue.append(initial_state_white_enemy_history)

        self.has_repeated_states = False
Esempio n. 4
0
    def restart_environment_episode(self):
        self.board = CheckerBoard()

        self.update_game_info()

        return self.observation
Esempio n. 5
0
class CheckersEnvironmentWrapper:
    def __init__(self):
        # env initialization
        self.actions = {}
        self.observation = []
        self.reward = 0
        self.done = False
        self.last_action_idx = 0

        # initialize the board
        self.board = CheckerBoard()

        self.width = len(self.board.get_state_vector())
        self.height = 1

        self.win_reward = 100
        self.defeat_reward = -100

        self.game_turns = 0
        self.score = 0

        self.enable_capturing_reward = False

        for idx, move in enumerate(self.board.get_all_moves()):
            self.actions[idx] = move

        print("total actions: ", len(self.actions))

        self.action_space_size = len(self.actions)

        self.reset()

    def update_game_info(self):
        self.observation = self.board.get_state_vector()

    def restart_environment_episode(self):
        self.board = CheckerBoard()

        self.update_game_info()

        return self.observation

    def _idx_to_action(self, action_idx):
        return self.actions[action_idx]

    def get_valid_idx_actions(self):
        possible_idx_actions = []

        possible_moves = self.board.get_legal_moves()

        for idx, action in self.actions.items():
            if action in possible_moves:
                possible_idx_actions.append(idx)

        return possible_idx_actions

    def step(self, action_idx):
        assert self.board.get_current_player(
        ) == self.board.BLACK_PLAYER, "Training player should be black!"

        self.last_action_idx = action_idx

        action = self.actions[action_idx]

        # print("take action ", action_idx, " : ", action)

        white_pieces_before = self.board.get_white_num(
        ) + self.board.get_white_kings_num()
        white_kings_pieces_before = self.board.get_white_kings_num()
        black_pieces_before = self.board.get_black_num(
        ) + self.board.get_black_kings_num()
        black_kings_pieces_before = self.board.get_black_kings_num()

        self.board.make_move(action)

        if self.board.get_current_player() == self.board.WHITE_PLAYER:
            if not self.board.is_over():
                # make AI opponent move
                self.opponent_move()

        self.update_game_info()

        white_pieces = self.board.get_white_num(
        ) + self.board.get_white_kings_num()
        white_kings_pieces = self.board.get_white_kings_num()
        black_pieces = self.board.get_black_num(
        ) + self.board.get_black_kings_num()
        black_kings_pieces = self.board.get_black_kings_num()

        if self.board.is_over():
            print("black: p. %d, k. %d, white: p. %d, k. %d" %
                  (black_pieces, black_kings_pieces, white_pieces,
                   white_kings_pieces))

            if self.board.get_winner() == self.board.BLACK_PLAYER:
                # black wins
                print("black wins")
                self.reward = self.win_reward
            else:
                print("white wins")
                self.reward = self.defeat_reward
        else:
            if self.enable_capturing_reward:
                captured_whites = white_pieces_before - white_pieces
                captured_black = black_pieces_before - black_pieces

                self.reward = captured_whites - captured_black
            else:
                self.reward = 0

        self.score += self.reward
        self.game_turns += 1

        self.done = self.board.is_over()

        return self.observation, self.reward, self.done

    def opponent_move(self):
        current_player = self.board.get_current_player()

        moves = self.board.get_legal_moves()
        action = random.choice(moves)

        # print("opponent takes action ", action)

        self.board.make_move(action)

        if self.board.get_current_player() == current_player:
            # print("opponent takes a jump")
            self.opponent_move()

    def reset(self):
        self.restart_environment_episode()
        self.done = False
        self.reward = 0.0
        self.last_action_idx = 0

        self.game_turns = 0
        self.score = 0

        return self.observation, self.reward, self.done
        self.repaint_board()

    def move_ai_piece(self):
        self.board.flip_board_nocopy()  # flip it for use

        best_move = get_moves_computer(self.board)

        self.board.move_piece(best_move.oldLoc, best_move.newLoc)

        self.board.flip_board_nocopy()  # flip it back

    def deselect_piece(self):
        self.selected = None
        self.repaint_board()


matrixChecker = CheckerBoard()
c = CheckerBoardGUI(matrixChecker)
root = Tk()
b = Button(root, text="Deselect Piece", command=c.deselect_piece)
b.pack()
canvas = Canvas(root, width=400, height=400)
canvas.pack()
c.draw_checkerboard()
canvas.bind("<Button-1>", c.click_piece)
canvas.pack()
canvas.bind("<Button-2>", c.move_selected_piece)
canvas.pack()
c.repaint_board()
root.mainloop()
Esempio n. 7
0
class CheckersGame(Game):
    def __init__(self, n, history_n=7, cloned=False):
        self.n = n
        self.history_n = history_n

        self.player_mapping = {
            0: BLACK_PLAYER,
            1: WHITE_PLAYER
        }

        self.actions = {}
        self.states_history = None

        self.black_own_history_queue = None
        self.black_enemy_history_queue = None
        self.white_own_history_queue = None
        self.white_enemy_history_queue = None

        if not cloned:
            self.reset()

            for idx, move in enumerate(self.board_impl.get_all_moves()):
                self.actions[idx] = move

    def reset(self):
        self.board_impl = CheckerBoard(self.n)

        self.states_history = {}

        self.black_own_history_queue = deque([], maxlen=self.history_n)
        self.black_enemy_history_queue = deque([], maxlen=self.history_n)
        self.white_own_history_queue = deque([], maxlen=self.history_n)
        self.white_enemy_history_queue = deque([], maxlen=self.history_n)

        initial_state = np.array(self.board_impl.get_true_state())

        initial_state_black_own_history = self.board_impl.get_state_matrix_own(BLACK_PLAYER)
        initial_state_black_enemy_history = self.board_impl.get_state_matrix_enemy(BLACK_PLAYER)
        initial_state_white_own_history = self.board_impl.get_state_matrix_own(WHITE_PLAYER)
        initial_state_white_enemy_history = self.board_impl.get_state_matrix_enemy(WHITE_PLAYER)

        for idx in range(self.history_n):
            self.black_own_history_queue.append(initial_state_black_own_history)
            self.black_enemy_history_queue.append(initial_state_black_enemy_history)
            self.white_own_history_queue.append(initial_state_white_own_history)
            self.white_enemy_history_queue.append(initial_state_white_enemy_history)

        self.has_repeated_states = False

    def clone(self):
        obj = CheckersGame(self.n, history_n=self.history_n, cloned=True)

        obj.board_impl = self.board_impl.clone()
        obj.states_history = copy.copy(self.states_history)

        obj.black_own_history_queue = copy.copy(self.black_own_history_queue)
        obj.black_enemy_history_queue = copy.copy(self.black_enemy_history_queue)
        obj.white_own_history_queue = copy.copy(self.white_own_history_queue)
        obj.white_enemy_history_queue = copy.copy(self.white_enemy_history_queue)

        obj.has_repeated_states = self.has_repeated_states
        obj.actions = self.actions

        return obj

    def get_cur_player(self):
        cur_player = self.board_impl.get_current_player()
        if cur_player == self.board_impl.BLACK_PLAYER:
            return 0
        else:
            return 1

    def get_players_num(self):
        return 2

    def get_action_size(self):
        return len(self.actions)

    def get_observation_size(self):
        if self.history_n != 0:
            return (self.history_n * 2, self.n, self.n)
        else:
            return (self.n, self.n)

    def make_move(self, action_idx):

        player = self.get_cur_player()

        assert 0 <= action_idx < len(self.actions), "Invalid action index"

        action = self.actions[action_idx]

        is_capturing_move = self.board_impl.make_move(action)

        state = np.array(self.board_impl.get_true_state())
        state_hash = state.tostring()

        self.black_own_history_queue.append(self.board_impl.get_state_matrix_own(BLACK_PLAYER))
        self.black_enemy_history_queue.append(self.board_impl.get_state_matrix_enemy(BLACK_PLAYER))

        self.white_own_history_queue.append(self.board_impl.get_state_matrix_own(WHITE_PLAYER))
        self.white_enemy_history_queue.append(self.board_impl.get_state_matrix_enemy(WHITE_PLAYER))

        if is_capturing_move:
            # clear states history for repeated states
            # since we don't need to check for the states
            # which cannot be repeated due to changed
            # num of pieces on the board
            self.states_history = {}
            self.has_repeated_states = False

        if state_hash in self.states_history:
            repeated_states = self.states_history[state_hash]
            self.states_history[state_hash] = repeated_states + 1
            self.has_repeated_states = True
        else:
            self.states_history[state_hash] = 1

        return self.get_score(player), self.get_cur_player()

    def get_valid_moves(self, player):
        possible_idx_actions = [0] * self.get_action_size()

        inner_player = self.player_mapping[player]

        possible_moves = self.board_impl.get_legal_moves(player=inner_player)

        # forbid repeated states
        for idx, action in self.actions.items():
            if action in possible_moves:
                possible_idx_actions[idx] = 1

                if self.has_repeated_states:
                    # simulate move
                    board_clone = self.board_impl.clone()
                    board_clone.set_current_player(inner_player)
                    board_clone.make_move(action)

                    state = np.array(board_clone.get_true_state())
                    state_hash = state.tostring()

                    if state_hash in self.states_history:
                        repeated_states = self.states_history[state_hash]
                        if repeated_states >= 2:
                            # Action forbidden due to the potential draw situation
                            possible_idx_actions[idx] = 0

        return np.array(possible_idx_actions)

    def is_ended(self):
        return self.is_draw() or np.sum(self.get_valid_moves(0)) == 0 or np.sum(self.get_valid_moves(1)) == 0

    def is_draw(self):
        return self.board_impl.is_draw()

    def get_score(self, player):
        if self.is_ended():
            if self.is_draw():
                return -1

            if np.sum(self.get_valid_moves(player)) == 0:
                return -1
            else:
                return 1

        return 0

    def get_observation(self, player):
        inner_player = self.player_mapping[player]

        if self.history_n == 0:
            observation = np.array(self.board_impl.get_observation(inner_player))
        else:
            if inner_player == BLACK_PLAYER:
                own_history = list(reversed(self.black_own_history_queue))
                enemy_history = list(reversed(self.black_enemy_history_queue))
            else:
                own_history = list(reversed(self.white_own_history_queue))
                enemy_history = list(reversed(self.white_enemy_history_queue))

            observation = []
            observation.extend(own_history)
            observation.extend(enemy_history)

            observation = np.array(observation)

        return observation

    def get_observation_str(self, observation):
        return observation.tostring()

    def get_display_str(self):
        # return self.board_impl.get_state_str()
        return self.board_impl.get_true_state_str()

    def reset_unknown_states(self, player):
        pass

    def _get_state(self):
        return np.array(self.board_impl.get_true_state())

    def get_custom_score(self, player):
        own_pieces, own_kings, enemy_pieces, enemy_kings = self.get_pieces(player)

        return own_pieces + 2 * own_kings - (enemy_pieces + 2 * enemy_kings)

    def get_pieces(self, player):
        inner_player = self.player_mapping[player]

        return self.board_impl.get_pieces(inner_player)
Esempio n. 8
0
import random
from collections import namedtuple

from checkers import CheckerBoard
from neuralnet import max_index, thing_1, thing_2

Move = namedtuple('Move', ['oldLoc', 'newLoc'])
TrainingData = namedtuple('TrainingData', ['stateVec', 'whoWins'])

game = CheckerBoard()

HUMAN_PLAYING = True


def get_moves_human():
    print("Your pieces are", game.get_team_locs(-1))
    piece_index = int(input("Select an index"))
    piece = game.get_team_locs(-1)[piece_index]

    possible_moves = game.get_possible_moves(piece[0], piece[1])
    if len(possible_moves) == 0:
        print("No moves possible. Try again. ")
        return get_moves_human()
    print("You can move that piece to", possible_moves)
    if len(possible_moves) == 1:
        move = game.get_possible_moves(piece[0], piece[1])[0]
    else:
        move_index = int(input("Select an index"))
        try:
            move = game.get_possible_moves(piece[0], piece[1])[move_index]
        except (IndexError, KeyError):
            max_val = n
            i = i
    return i

games_per_itr = 50
if __name__ == '__main__':
    try:
        for super_itr in range(100):

            print("Iteration:", super_itr)
            thing1_data = []
            thing2_data = []

            for i in range(games_per_itr):  # play multiple games
                should_display_game_results = i == games_per_itr - 1
                game = CheckerBoard()
                game.scramble()
                thing1_state_vecs = []
                thing2_state_vecs = []

                # by flippping the board we switch teams but actually stay on team 1 according to the game object
                team_for_real = 1
                moves_taken = 0
                while not game.is_game_over()[0]:  # play 1 game

                    moves = game.get_all_possible_moves()

                    possible_moves_lst = []

                    for piece, possible_piece_moves in moves.items():
                        for possible_piece_move in possible_piece_moves: