예제 #1
0
def eval(agent:Agent, env: Checkers, color:str, n_games=100):
    agent.net.eval()
    opponent = Agent(gamma=agent.gamma,
                     epsilon=1,
                     lr=0,
                     input_dims=[8*8 + 1],
                     batch_size=agent.batch_size,
                     action_space=agent.action_space,
                     eps_dec=0,
                     max_mem_size=0
                     )
    opponent.net.eval()
    initial_state = env.save_state()
    score = {'black': 0, 'white': 0}

    for i in tqdm(range(n_games)):
        env.restore_state(initial_state)
        winner = None
        moves = torch.tensor(env.legal_moves())
        board, turn, last_moved_piece = env.save_state()
        brain = agent if turn == color else opponent
        board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
        encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor([0.])
        observation = torch.cat([board_tensor, encoded_turn])
        while not winner:
            action = brain.choose_action(observation)
            while not action_is_legal(action, moves):
                action = brain.choose_action(observation)
            new_board, new_turn, _, moves, winner = env.move(*action.tolist())
            moves = torch.tensor(moves)
            board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
            encoded_turn = torch.tensor([1. if turn == 'black' else 0.])
            observation = torch.cat([board_tensor, encoded_turn])
            brain = agent if turn == color else opponent
        score[winner] +=1
    agent.net.train()
    return score[color] / n_games
예제 #2
0
    }
    env = Checkers()
    initial_state = env.save_state()
    eps_history = []
    score = {'black': 0, 'white': 0}

    os.makedirs(args.checkpoints_dir, exist_ok=True)

    for i in range(args.games):
        print(
            f"episode={i}, score={score}, black_eps:{players['black'].epsilon}, white_eps:{players['white'].epsilon}"
        )
        score = {'black': 0, 'white': 0}
        env.restore_state(initial_state)
        winner = None
        moves = torch.tensor(env.legal_moves())
        board, turn, last_moved_piece = env.save_state()
        brain = players[turn]
        board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
        encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor(
            [0.])
        observation = torch.cat([board_tensor, encoded_turn])
        while not winner:
            action = brain.choose_action(observation)
            if not action_is_legal(action, moves):
                reward = -1000000
                new_turn = turn
            else:
                new_board, new_turn, _, moves, winner = env.move(
                    *action.tolist())
                moves = torch.tensor(moves)
예제 #3
0
class SavedGame(object):
    def __init__(self):
        self._model = Checkers()
        self.to_move = None
        self.moves = []
        self.description = ''
        self.black_men = []
        self.white_men = []
        self.black_kings = []
        self.white_kings = []
        self.flip_board = False
        self.num_players = 1
        self._move_check = False
        self._bm_check = False
        self._bk_check = False
        self._wm_check = False
        self._wk_check = False

    def _write_positions(self, f, prefix, positions):
        f.write(prefix + ' ')
        for p in sorted(positions):
            f.write('%d ' % p)
        f.write('\n')

    def _write_moves(self, f):
        f.write('<moves>\n')
        for move in reversed(self.moves):
            start = keymap[move.affected_squares[FIRST][0]]
            dest = keymap[move.affected_squares[LAST][0]]
            movestr = '%d-%d' % (start, dest)
            annotation = move.annotation
            if annotation.startswith(movestr):
                annotation = annotation.replace(movestr, '', 1).rstrip()
            f.write('%s;%s\n' % (movestr, annotation))

    def write(self, filename):
        with open(filename, 'w') as f:
            f.write('<description>\n')
            for line in self.description.splitlines():
                # numbered lists or hyperlinks are not word wrapped.
                if line.startswith('# ') or '[[' in line:
                    f.write(line + '\n')
                    continue
                else:
                    f.write(textwrap.fill(line, 80) + '\n')
            f.write('<setup>\n')
            if self.to_move == WHITE:
                f.write('white_first\n')
            elif self.to_move == BLACK:
                f.write('black_first\n')
            else:
                raise ValueError, "Unknown value for to_move variable"
            if self.num_players >=0 and self.num_players <=2:
                f.write('%d_player_game\n' % self.num_players)
            else:
                raise ValueError, "Unknown value for num_players variable"
            if self.flip_board:
                f.write('flip_board 1\n')
            else:
                f.write('flip_board 0\n')
            self._write_positions(f, 'black_men', self.black_men)
            self._write_positions(f, 'black_kings', self.black_kings)
            self._write_positions(f, 'white_men', self.white_men)
            self._write_positions(f, 'white_kings', self.white_kings)
            self._write_moves(f)

    def read(self, filename):
        with open(filename, 'r') as f:
            lines = f.readlines()

        linelen = len(lines)
        i = 0
        while True:
            if i >= linelen:
                break

            line = lines[i].strip()
            if line.startswith('<description>'):
                self.description = ''
                i += 1
                while i < linelen and not lines[i].startswith('<setup>'):
                    self.description += lines[i]
                    i += 1
            elif line.startswith('<setup>'):
                i = self._parse_setup(lines, i, linelen)
            elif line.startswith('<moves>'):
                i = self._parse_moves(lines, i, linelen)
            else:
                raise IOError, 'Unrecognized section in file, line %d' % (i+1)

    def _parse_items(self, line):
        men = line.split()[1:]
        return map(int, men)

    def _add_men_to_board(self, locations, val):
        squares = self._model.curr_state.squares
        try:
            for loc in locations:
                idx = squaremap[loc]
                squares[idx] = val
        except ValueError:
            raise IOError, 'Checker location not valid, line %d' % (i+1)

    def _parse_setup(self, lines, idx, linelen):
        curr_state = self._model.curr_state
        curr_state.clear()
        idx += 1
        while idx < linelen and '<moves>' not in lines[idx]:
            line = lines[idx].strip().lower()
            if line == 'white_first':
                self.to_move = curr_state.to_move = WHITE
                self._move_check = True
            elif line == 'black_first':
                self.to_move = curr_state.to_move = BLACK
                self._move_check = True
            elif line.endswith('player_game'):
                numstr, _ = line.split('_', 1)
                self.num_players = int(numstr)
            elif line.startswith('flip_board'):
                _, setting = line.split()
                val = int(setting)
                self.flip_board = True if val else False
            elif line.startswith('black_men'):
                self.black_men = self._parse_items(line)
                self._add_men_to_board(self.black_men, BLACK | MAN)
                self._bm_check = True
            elif line.startswith('white_men'):
                self.white_men = self._parse_items(line)
                self._add_men_to_board(self.white_men, WHITE | MAN)
                self._wm_check = True
            elif line.startswith('black_kings'):
                self.black_kings = self._parse_items(line)
                self._add_men_to_board(self.black_kings, BLACK | KING)
                self._bk_check = True
            elif line.startswith('white_kings'):
                self.white_kings = self._parse_items(line)
                self._add_men_to_board(self.white_kings, WHITE | KING)
                self._wk_check = True
            idx += 1
        if (not self._move_check and not self._bm_check and not self._wm_check
            and not self._bk_check and not self._wk_check):
            raise IOError, 'Error in <setup> section: not all required items found'
        return idx

    def _is_move(self, delta):
        return delta in KING_IDX

    def _is_jump(self, delta):
        return delta not in KING_IDX

    def _try_move(self, idx, start, dest, state_copy, annotation):
        legal_moves = self._model.legal_moves(state_copy)
        # match move from file with available moves on checkerboard
        found = False
        startsq, destsq = squaremap[start], squaremap[dest]
        for move in legal_moves:
            if (startsq == move.affected_squares[FIRST][0] and
                destsq == move.affected_squares[LAST][0]):
                self._model.make_move(move, state_copy, False, False)
                move.annotation = annotation
                self.moves.append(move)
                found = True
                break
        if not found:
            raise IOError, 'Illegal move found in file, line %d' % (idx+1)

    def _try_jump(self, idx, start, dest, state_copy, annotation):
        if not self._model.captures_available(state_copy):
            return False
        legal_moves = self._model.legal_moves(state_copy)
        # match jump from file with available jumps on checkerboard
        startsq, destsq = squaremap[start], squaremap[dest]
        small, large = min(startsq, destsq), max(startsq, destsq)
        found = False
        for move in legal_moves:
            # a valid jump may either have a single jump in it, or
            # multiple jumps. In the multiple jump case, startsq is the
            # source of the first jump, and destsq is the endpoint of the
            # last jump.
            if (startsq == move.affected_squares[FIRST][0] and
                destsq == move.affected_squares[LAST][0]):
                self._model.make_move(move, state_copy, False, False)
                move.annotation = annotation
                self.moves.append(move)
                found = True
                break
        return found

    def _parse_moves(self, lines, idx, linelen):
        """ Each move in the file lists the beginning and ending square, along
        with an optional annotation string (in Creole format) that describes it.
        Since the move listing in the file contains less information than
        we need inside our Checkerboard model, I make sure that each move works
        on a copy of the model before I commit to using it inside the code. """
        state_copy = copy.deepcopy(self._model.curr_state)
        idx += 1
        while idx < linelen:
            line = lines[idx].strip()
            if line == "":
                idx += 1
                continue # ignore blank lines

            try:
                movestr, annotation = line.split(';', 1)
            except ValueError:
                raise IOError, 'Unrecognized section in file, line %d' % (idx+1)

            # move is always part of the annotation; I just don't want to
            # have to repeat it explicitly in the file.
            annotation = movestr + annotation

            # analyze affected squares to perform a move or jump.
            try:
                start, dest = [int(x) for x in movestr.split('-')]
            except ValueError:
                raise IOError, 'Bad move format in file, line %d' % idx
            delta = squaremap[start] - squaremap[dest]
            if self._is_move(delta):
                self._try_move(idx, start, dest, state_copy, annotation)
            else:
                jumped = self._try_jump(idx, start, dest, state_copy,
                                        annotation)
                if not jumped:
                    raise IOError, 'Bad move format in file, line %d' % idx
            idx += 1
        self.moves.reverse()
        return idx
예제 #4
0
class CheckersGame(Game):
    def __init__(self, history=[]):
        # Rollout statistics
        self.child_visits = []
        # Terminal values for the first player
        # 1 for win
        # 0 for draw
        # -1 for loss
        # None for incomplete
        self.game_value = None

        # XXX Conventions:
        # - Black player moves first
        # - Ego-centric views assume the king row are at the top, i.e. starts at the bottom (Second player has the same view as absolute)
        self.ch = Checkers()

        # Action space
        self.actions = []
        # Simple moves
        for from_sq in range(self.ch.n_positions):
            for to_sq in self.ch.neighbors[from_sq]:
                if to_sq is not None:
                    simple_move = (from_sq, to_sq)
                    self.actions.append(simple_move)

        assert 98 == len(self.actions), 'There should be 98 simple moves.'
        # Jumps
        for from_sq in range(self.ch.n_positions):
            row, col = self.ch.sq2pos(from_sq)
            # For each direction
            for di, (drow, dcol) in enumerate(Checkers.dir2del):
                next_row, next_col = row + 2 * drow, col + 2 * dcol
                if 0 <= next_row < self.ch.size and 0 <= next_col < self.ch.size:
                    # Within bound
                    to_sq = self.ch.pos2sq(next_row, next_col)
                    jump = (from_sq, to_sq)
                    self.actions.append(jump)
        self.num_actions = len(self.actions)
        assert 98 + 72 == self.num_actions, 'There should be 98 simple moves and 72 jumps.'
        # Inverse dictionary
        self.action2ind = {
            action: ind
            for ind, action in enumerate(self.actions)
        }
        # Square mapping from absolute to first player's ego-centric (reflect through the center)
        self.abs2ego_sq = {}
        for sq in range(self.ch.n_positions):
            row, col = self.ch.sq2pos(sq)
            re_row, re_col = -row + self.ch.size - 1, -col + self.ch.size - 1
            re_sq = self.ch.pos2sq(re_row, re_col)
            self.abs2ego_sq[sq] = re_sq
        # Inverse
        self.ego2abs_sq = {re_sq: sq for sq, re_sq in self.abs2ego_sq.items()}

        # Move mapping from absolute to first player's ego-centric
        self.abs2ego_ac = {}
        for ac, (from_sq, to_sq) in enumerate(self.actions):
            ego_move = (self.abs2ego_sq[from_sq], self.abs2ego_sq[to_sq])
            ego_ac = self.action2ind[ego_move]
            self.abs2ego_ac[ac] = ego_ac
        # Inverse
        self.ego2abs_ac = {
            ego_ac: ac
            for ac, ego_ac in self.abs2ego_ac.items()
        }

        # Fast forward to the last state by taking actions from history
        self.history = []
        for action in history:
            self.apply(action)

    def clone(self):
        game = CheckersGame()
        state = self.ch.save_state()
        game.ch.restore_state(state)
        return game

    def apply(self, action_index):
        from_sq, to_sq = self.actions[action_index]
        board, turn, last_moved_piece, all_next_moves, winner = self.ch.move(
            from_sq, to_sq)

        # Terminate when one player wins
        if winner == 'black':
            self.game_value = 1
        elif winner == 'white':
            self.game_value = -1

        self.history.append(action_index)

    def legal_actions(self):
        moves = self.ch.legal_moves()
        action_idices = {self.action2ind[move] for move in moves}
        return action_idices

    def is_first_player_turn(self):
        return self.ch.turn == 'black'

    def ego_board_representation(self):
        # XXX Channels
        # 0 my men
        # 1 my kings
        # 2 opponent's men
        # 3 opponent's kings
        # 4 my last moved piece
        # QUESTION: try indicating the king row and skipping ego transform?
        rep = np.zeros((self.ch.size, self.ch.size, 5))
        if self.ch.turn == 'white':
            # Same as the absolute view
            for sq in self.ch.board['white']['men']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 0] = 1
            for sq in self.ch.board['white']['kings']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 1] = 1
            for sq in self.ch.board['black']['men']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 2] = 1
            for sq in self.ch.board['black']['kings']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 3] = 1
            if self.ch._last_moved_piece is not None:
                row, col = self.ch.sq2pos(self.ch._last_moved_piece)
                rep[row, col, 4] = 1
        else:
            # Need to invert the board
            for sq in self.ch.board['black']['men']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 0] = 1
            for sq in self.ch.board['black']['kings']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 1] = 1
            for sq in self.ch.board['white']['men']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 2] = 1
            for sq in self.ch.board['white']['kings']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 3] = 1
            if self.ch._last_moved_piece is not None:
                sq = self.abs2ego_sq[self.ch._last_moved_piece]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 4] = 1
        return rep

    def ego_sample(self, state_index: int):
        # Fast forward
        game = CheckersGame(list(self.history[:state_index]))
        # Ego-centric views of the current player
        rep = game.ego_board_representation()
        # Zero-sum game
        ego_val = self.game_value if game.is_first_player_turn() else (
            0 - self.game_value)
        # Ego-centric actions
        if game.is_first_player_turn():
            # Invert actions for the first player
            visits = np.zeros(self.num_actions)
            for i in range(self.num_actions):
                visits[self.abs2ego_ac[i]] = self.child_visits[state_index][i]
        else:
            visits = np.asarray(self.child_visits[state_index])
        return rep, ego_val, visits

    def ego2abs_policy(self, is_first_player, ego_policy):
        if is_first_player:
            policy = np.zeros(self.num_actions)
            for ego_ac, pr in enumerate(ego_policy):
                policy[self.ego2abs_ac[ego_ac]] = pr
        else:
            policy = ego_policy
        return policy