예제 #1
0
파일: tests.py 프로젝트: travcunn/checkers
    def test_move(self):
        c = Checkers()

        # COMMENTS ARE FROM THE PLAYER'S PERSPECTIVE

        # move red forward left
        self.assertTrue(c.board[3][2] is None)
        c.move(src=(1, 2), dest=(2, 3))
        self.assertTrue(c.board[3][2] is not None)

        # move red forward right
        self.assertTrue(c.board[4][1] is None)
        c.move(src=(2, 3), dest=(1, 4))
        self.assertTrue(c.board[4][1] is not None)

        # try moving red where an enemy is
        self.assertTrue(c.board[5][0].color is Checkers.colors['black'])
        with self.assertRaises(MoveError):
            c.move(src=(1, 4), dest=(0, 5))
        self.assertTrue(c.board[5][0].color is Checkers.colors['black'])

        # try moving red backwards as a regular piece
        self.assertTrue(c.board[3][2] is None)
        with self.assertRaises(MoveError):
            c.move(src=(1, 4), dest=(2, 3))

        # try moving a piece that doesn't exist
        self.assertTrue(c.board[3][1] is None)
        with self.assertRaises(KeyError):
            c.move(src=(1, 3), dest=(2, 2))
예제 #2
0
파일: tests.py 프로젝트: travcunn/checkers
    def test_single_jump(self):
        c = Checkers()

        # move a red piece in a position where black can jump
        c.move(src=(1, 2), dest=(2, 3))
        c.move(src=(2, 3), dest=(3, 4))

        # make sure the destination is empty
        self.assertTrue(c.board[3][4] is None)
        # jump a black piece over the red piece
        c.move(src=(2, 5), dest=(4, 3))
        # make sure the piece is at the destination
        self.assertTrue(c.board[3][4].color is Checkers.colors['black'])
        # make sure the jumped piece is removed from the board
        self.assertTrue(c.board[4][3] is None)
예제 #3
0
def eval(agent:Agent, env: Checkers, color:str, n_games=100):
    agent.net.eval()
    opponent = Agent(gamma=agent.gamma,
                     epsilon=1,
                     lr=0,
                     input_dims=[8*8 + 1],
                     batch_size=agent.batch_size,
                     action_space=agent.action_space,
                     eps_dec=0,
                     max_mem_size=0
                     )
    opponent.net.eval()
    initial_state = env.save_state()
    score = {'black': 0, 'white': 0}

    for i in tqdm(range(n_games)):
        env.restore_state(initial_state)
        winner = None
        moves = torch.tensor(env.legal_moves())
        board, turn, last_moved_piece = env.save_state()
        brain = agent if turn == color else opponent
        board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
        encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor([0.])
        observation = torch.cat([board_tensor, encoded_turn])
        while not winner:
            action = brain.choose_action(observation)
            while not action_is_legal(action, moves):
                action = brain.choose_action(observation)
            new_board, new_turn, _, moves, winner = env.move(*action.tolist())
            moves = torch.tensor(moves)
            board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
            encoded_turn = torch.tensor([1. if turn == 'black' else 0.])
            observation = torch.cat([board_tensor, encoded_turn])
            brain = agent if turn == color else opponent
        score[winner] +=1
    agent.net.train()
    return score[color] / n_games
예제 #4
0
파일: board.py 프로젝트: Bulkin/pycheckers
class BoardView(gtk.DrawingArea):

    def __init__(self):
        gtk.DrawingArea.__init__(self)
        
        self.tile_size = 100
        self.tile_count = 8
        self.w = self.h = self.tile_count * self.tile_size
        
        cmap = self.get_colormap()
        self.color_black = cmap.alloc_color('#000000')
        self.color_white = cmap.alloc_color('#FFFFFF')    
        self.color_red   = cmap.alloc_color('#BB3333')
        self.color_green = cmap.alloc_color('#33DD33')
        self.set_size_request(320, 320)

        self.connect("expose_event", self.exposeEvent)
        self.connect("size-allocate", self.__sizeAllocate)
        self.connect("button_press_event", self.buttonPressEvent)
        self.set_events(gtk.gdk.EXPOSURE_MASK | gtk.gdk.BUTTON_PRESS_MASK)

        #TODO: use settings
        self.tile_w = gtk.gdk.pixbuf_new_from_file("images/tile_w.jpg")
        self.tile_b = gtk.gdk.pixbuf_new_from_file("images/tile_b.jpg")
        self.selector = gtk.gdk.pixbuf_new_from_file("images/selector.png")

        self.checkers = Checkers()
        self.checkers.setSelected((2,5))
    
    def __sizeAllocate(self, widget, rect):
        if rect.width < rect.height:
            self.tile_size = rect.width / self.tile_count
        else:
            self.tile_size = rect.height / self.tile_count
        self.w = self.h = self.tile_count * self.tile_size

    def exposeEvent(self, widget, event):
        window = self.window
        gc = window.new_gc()

        rect = gtk.gdk.Rectangle(0, 0, self.w, self.h)

        window.begin_paint_rect(rect)
        self.__drawBoard(window, gc, rect)
        window.end_paint()

    def __drawBackground(self, window, gc, rect):
        bg_w = self.tile_w.get_width()
        bg_h = self.tile_w.get_height() 
        mod = True
        for i in range(self.tile_count):
            for ii in range(self.tile_count):
                if mod:
                    window.draw_pixbuf(gc, self.tile_w, (bg_w - self.tile_size) / 2, 
                                       (bg_h - self.tile_size) / 2,
                                       i * self.tile_size, ii * self.tile_size,
                                       self.tile_size, self.tile_size)
                else:
                    window.draw_pixbuf(gc, self.tile_b, (bg_w - self.tile_size) / 2, 
                                       (bg_h - self.tile_size) / 2,
                                       i * self.tile_size, ii * self.tile_size,
                                       self.tile_size, self.tile_size)
                mod = not mod
            mod = not mod
                    
        
    def __drawPiece(self, window, gc, rect, type, x, y):
        gc
        s = self.tile_size
        if abs(type) == Checkers.WH:
            gc.set_foreground(self.color_white)
            window.draw_arc(gc, True, s*x+s/10, s*y+s/10, s - s/5, s - s/5, 0, 65*360)
            if type < 0:
                gc.set_foreground(self.color_black)
                window.draw_arc(gc, True, s*x+s/4, s*y+s/4, s/2, s/2, 0, 65*360)
        elif abs(type) == Checkers.BL:
            gc.set_foreground(self.color_black)
            window.draw_arc(gc, True, s*x+s/10, s*y+s/10, s - s/5, s - s/5,
                            0, 65*360)
            if type < 0:
                gc.set_foreground(self.color_white)
                window.draw_arc(gc, True, s*x+s/4, s*y+s/4, s/2, s/2, 0, 65*360)

    def __drawPieces(self, window, gc, rect):
        for i in range(8):
            for ii in range(8):
                if (i+ii)%2:
                    self.__drawPiece(window, gc, rect, self.checkers.checkTile(ii,i), ii, i)
        
    def __drawSelector(self, window, gc):
        a = self.checkers.getSelected()[0]
        if a:
            gc.set_line_attributes(self.tile_size / 20,
                                   gtk.gdk.LINE_SOLID,
                                   gtk.gdk.CAP_NOT_LAST,
                                   gtk.gdk.JOIN_MITER)
            gc.set_foreground(self.color_red)
            window.draw_rectangle(gc, False, a[0] * self.tile_size, a[1] * self.tile_size,
                                  self.tile_size, self.tile_size)
           
    def __drawMoves(self, window, gc):
        a = self.checkers.getSelected()[1:]
        for i in a:
            gc.set_line_attributes(self.tile_size / 20,
                                   gtk.gdk.LINE_SOLID,
                                   gtk.gdk.CAP_NOT_LAST,
                                   gtk.gdk.JOIN_MITER)
            gc.set_foreground(self.color_green)
            window.draw_rectangle(gc, False, i[0] * self.tile_size, i[1] * self.tile_size,
                                  self.tile_size, self.tile_size)
        
    def __drawBoard(self, window, gc, rect):
        self.__drawBackground(window, gc, rect)
        self.__drawPieces(window, gc, rect)
        self.__drawSelector(window, gc)
        self.__drawMoves(window, gc)

    def __getCoords(self, x, y):
        x = int(x / self.tile_size)
        y = int(y / self.tile_size)
        if x > 7 or y > 7:
            return None
        else:
            return (x, y)

    def buttonPressEvent(self, widget, event):
        if event.button != 1:
            return
        target = self.__getCoords(event.x, event.y)
        if not target:
            return
        m = self.checkers.getSelected()
        if target in m[1:]:
            self.checkers.move(m, target)
        else:
            self.checkers.setSelected(target)
        self.exposeEvent(None, None)

    def reset(self):
        self.checkers = Checkers()
예제 #5
0
 env.restore_state(initial_state)
 winner = None
 moves = torch.tensor(env.legal_moves())
 board, turn, last_moved_piece = env.save_state()
 brain = players[turn]
 board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
 encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor(
     [0.])
 observation = torch.cat([board_tensor, encoded_turn])
 while not winner:
     action = brain.choose_action(observation)
     if not action_is_legal(action, moves):
         reward = -1000000
         new_turn = turn
     else:
         new_board, new_turn, _, moves, winner = env.move(
             *action.tolist())
         moves = torch.tensor(moves)
         turn_score, new_turn_score = (get_score(new_board, player) -
                                       get_score(board, player)
                                       for player in [turn, new_turn])
         reward = turn_score - new_turn_score
     score[turn] += reward
     board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
     encoded_turn = torch.tensor([1. if turn == 'black' else 0.])
     new_observation = torch.cat([board_tensor, encoded_turn])
     brain.store_transition(observation, action, reward,
                            new_observation, bool(winner))
     brain.learn((observation != new_observation).any().item())
     observation = new_observation
     turn = new_turn
     brain = players[turn]
예제 #6
0
파일: tests.py 프로젝트: travcunn/checkers
    def test_invalid_jump(self):
        c = Checkers()

        # try jumping over the same colored piece
        with self.assertRaises(MoveError):
            c.move(src=(2, 1), dest=(0, 3))
예제 #7
0
class CheckersGame(Game):
    def __init__(self, history=[]):
        # Rollout statistics
        self.child_visits = []
        # Terminal values for the first player
        # 1 for win
        # 0 for draw
        # -1 for loss
        # None for incomplete
        self.game_value = None

        # XXX Conventions:
        # - Black player moves first
        # - Ego-centric views assume the king row are at the top, i.e. starts at the bottom (Second player has the same view as absolute)
        self.ch = Checkers()

        # Action space
        self.actions = []
        # Simple moves
        for from_sq in range(self.ch.n_positions):
            for to_sq in self.ch.neighbors[from_sq]:
                if to_sq is not None:
                    simple_move = (from_sq, to_sq)
                    self.actions.append(simple_move)

        assert 98 == len(self.actions), 'There should be 98 simple moves.'
        # Jumps
        for from_sq in range(self.ch.n_positions):
            row, col = self.ch.sq2pos(from_sq)
            # For each direction
            for di, (drow, dcol) in enumerate(Checkers.dir2del):
                next_row, next_col = row + 2 * drow, col + 2 * dcol
                if 0 <= next_row < self.ch.size and 0 <= next_col < self.ch.size:
                    # Within bound
                    to_sq = self.ch.pos2sq(next_row, next_col)
                    jump = (from_sq, to_sq)
                    self.actions.append(jump)
        self.num_actions = len(self.actions)
        assert 98 + 72 == self.num_actions, 'There should be 98 simple moves and 72 jumps.'
        # Inverse dictionary
        self.action2ind = {
            action: ind
            for ind, action in enumerate(self.actions)
        }
        # Square mapping from absolute to first player's ego-centric (reflect through the center)
        self.abs2ego_sq = {}
        for sq in range(self.ch.n_positions):
            row, col = self.ch.sq2pos(sq)
            re_row, re_col = -row + self.ch.size - 1, -col + self.ch.size - 1
            re_sq = self.ch.pos2sq(re_row, re_col)
            self.abs2ego_sq[sq] = re_sq
        # Inverse
        self.ego2abs_sq = {re_sq: sq for sq, re_sq in self.abs2ego_sq.items()}

        # Move mapping from absolute to first player's ego-centric
        self.abs2ego_ac = {}
        for ac, (from_sq, to_sq) in enumerate(self.actions):
            ego_move = (self.abs2ego_sq[from_sq], self.abs2ego_sq[to_sq])
            ego_ac = self.action2ind[ego_move]
            self.abs2ego_ac[ac] = ego_ac
        # Inverse
        self.ego2abs_ac = {
            ego_ac: ac
            for ac, ego_ac in self.abs2ego_ac.items()
        }

        # Fast forward to the last state by taking actions from history
        self.history = []
        for action in history:
            self.apply(action)

    def clone(self):
        game = CheckersGame()
        state = self.ch.save_state()
        game.ch.restore_state(state)
        return game

    def apply(self, action_index):
        from_sq, to_sq = self.actions[action_index]
        board, turn, last_moved_piece, all_next_moves, winner = self.ch.move(
            from_sq, to_sq)

        # Terminate when one player wins
        if winner == 'black':
            self.game_value = 1
        elif winner == 'white':
            self.game_value = -1

        self.history.append(action_index)

    def legal_actions(self):
        moves = self.ch.legal_moves()
        action_idices = {self.action2ind[move] for move in moves}
        return action_idices

    def is_first_player_turn(self):
        return self.ch.turn == 'black'

    def ego_board_representation(self):
        # XXX Channels
        # 0 my men
        # 1 my kings
        # 2 opponent's men
        # 3 opponent's kings
        # 4 my last moved piece
        # QUESTION: try indicating the king row and skipping ego transform?
        rep = np.zeros((self.ch.size, self.ch.size, 5))
        if self.ch.turn == 'white':
            # Same as the absolute view
            for sq in self.ch.board['white']['men']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 0] = 1
            for sq in self.ch.board['white']['kings']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 1] = 1
            for sq in self.ch.board['black']['men']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 2] = 1
            for sq in self.ch.board['black']['kings']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 3] = 1
            if self.ch._last_moved_piece is not None:
                row, col = self.ch.sq2pos(self.ch._last_moved_piece)
                rep[row, col, 4] = 1
        else:
            # Need to invert the board
            for sq in self.ch.board['black']['men']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 0] = 1
            for sq in self.ch.board['black']['kings']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 1] = 1
            for sq in self.ch.board['white']['men']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 2] = 1
            for sq in self.ch.board['white']['kings']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 3] = 1
            if self.ch._last_moved_piece is not None:
                sq = self.abs2ego_sq[self.ch._last_moved_piece]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 4] = 1
        return rep

    def ego_sample(self, state_index: int):
        # Fast forward
        game = CheckersGame(list(self.history[:state_index]))
        # Ego-centric views of the current player
        rep = game.ego_board_representation()
        # Zero-sum game
        ego_val = self.game_value if game.is_first_player_turn() else (
            0 - self.game_value)
        # Ego-centric actions
        if game.is_first_player_turn():
            # Invert actions for the first player
            visits = np.zeros(self.num_actions)
            for i in range(self.num_actions):
                visits[self.abs2ego_ac[i]] = self.child_visits[state_index][i]
        else:
            visits = np.asarray(self.child_visits[state_index])
        return rep, ego_val, visits

    def ego2abs_policy(self, is_first_player, ego_policy):
        if is_first_player:
            policy = np.zeros(self.num_actions)
            for ego_ac, pr in enumerate(ego_policy):
                policy[self.ego2abs_ac[ego_ac]] = pr
        else:
            policy = ego_policy
        return policy