def eval(agent:Agent, env: Checkers, color:str, n_games=100): agent.net.eval() opponent = Agent(gamma=agent.gamma, epsilon=1, lr=0, input_dims=[8*8 + 1], batch_size=agent.batch_size, action_space=agent.action_space, eps_dec=0, max_mem_size=0 ) opponent.net.eval() initial_state = env.save_state() score = {'black': 0, 'white': 0} for i in tqdm(range(n_games)): env.restore_state(initial_state) winner = None moves = torch.tensor(env.legal_moves()) board, turn, last_moved_piece = env.save_state() brain = agent if turn == color else opponent board_tensor = torch.from_numpy(env.flat_board()).view(-1).float() encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor([0.]) observation = torch.cat([board_tensor, encoded_turn]) while not winner: action = brain.choose_action(observation) while not action_is_legal(action, moves): action = brain.choose_action(observation) new_board, new_turn, _, moves, winner = env.move(*action.tolist()) moves = torch.tensor(moves) board_tensor = torch.from_numpy(env.flat_board()).view(-1).float() encoded_turn = torch.tensor([1. if turn == 'black' else 0.]) observation = torch.cat([board_tensor, encoded_turn]) brain = agent if turn == color else opponent score[winner] +=1 agent.net.train() return score[color] / n_games
} env = Checkers() initial_state = env.save_state() eps_history = [] score = {'black': 0, 'white': 0} os.makedirs(args.checkpoints_dir, exist_ok=True) for i in range(args.games): print( f"episode={i}, score={score}, black_eps:{players['black'].epsilon}, white_eps:{players['white'].epsilon}" ) score = {'black': 0, 'white': 0} env.restore_state(initial_state) winner = None moves = torch.tensor(env.legal_moves()) board, turn, last_moved_piece = env.save_state() brain = players[turn] board_tensor = torch.from_numpy(env.flat_board()).view(-1).float() encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor( [0.]) observation = torch.cat([board_tensor, encoded_turn]) while not winner: action = brain.choose_action(observation) if not action_is_legal(action, moves): reward = -1000000 new_turn = turn else: new_board, new_turn, _, moves, winner = env.move( *action.tolist()) moves = torch.tensor(moves)
class SavedGame(object): def __init__(self): self._model = Checkers() self.to_move = None self.moves = [] self.description = '' self.black_men = [] self.white_men = [] self.black_kings = [] self.white_kings = [] self.flip_board = False self.num_players = 1 self._move_check = False self._bm_check = False self._bk_check = False self._wm_check = False self._wk_check = False def _write_positions(self, f, prefix, positions): f.write(prefix + ' ') for p in sorted(positions): f.write('%d ' % p) f.write('\n') def _write_moves(self, f): f.write('<moves>\n') for move in reversed(self.moves): start = keymap[move.affected_squares[FIRST][0]] dest = keymap[move.affected_squares[LAST][0]] movestr = '%d-%d' % (start, dest) annotation = move.annotation if annotation.startswith(movestr): annotation = annotation.replace(movestr, '', 1).rstrip() f.write('%s;%s\n' % (movestr, annotation)) def write(self, filename): with open(filename, 'w') as f: f.write('<description>\n') for line in self.description.splitlines(): # numbered lists or hyperlinks are not word wrapped. if line.startswith('# ') or '[[' in line: f.write(line + '\n') continue else: f.write(textwrap.fill(line, 80) + '\n') f.write('<setup>\n') if self.to_move == WHITE: f.write('white_first\n') elif self.to_move == BLACK: f.write('black_first\n') else: raise ValueError, "Unknown value for to_move variable" if self.num_players >=0 and self.num_players <=2: f.write('%d_player_game\n' % self.num_players) else: raise ValueError, "Unknown value for num_players variable" if self.flip_board: f.write('flip_board 1\n') else: f.write('flip_board 0\n') self._write_positions(f, 'black_men', self.black_men) self._write_positions(f, 'black_kings', self.black_kings) self._write_positions(f, 'white_men', self.white_men) self._write_positions(f, 'white_kings', self.white_kings) self._write_moves(f) def read(self, filename): with open(filename, 'r') as f: lines = f.readlines() linelen = len(lines) i = 0 while True: if i >= linelen: break line = lines[i].strip() if line.startswith('<description>'): self.description = '' i += 1 while i < linelen and not lines[i].startswith('<setup>'): self.description += lines[i] i += 1 elif line.startswith('<setup>'): i = self._parse_setup(lines, i, linelen) elif line.startswith('<moves>'): i = self._parse_moves(lines, i, linelen) else: raise IOError, 'Unrecognized section in file, line %d' % (i+1) def _parse_items(self, line): men = line.split()[1:] return map(int, men) def _add_men_to_board(self, locations, val): squares = self._model.curr_state.squares try: for loc in locations: idx = squaremap[loc] squares[idx] = val except ValueError: raise IOError, 'Checker location not valid, line %d' % (i+1) def _parse_setup(self, lines, idx, linelen): curr_state = self._model.curr_state curr_state.clear() idx += 1 while idx < linelen and '<moves>' not in lines[idx]: line = lines[idx].strip().lower() if line == 'white_first': self.to_move = curr_state.to_move = WHITE self._move_check = True elif line == 'black_first': self.to_move = curr_state.to_move = BLACK self._move_check = True elif line.endswith('player_game'): numstr, _ = line.split('_', 1) self.num_players = int(numstr) elif line.startswith('flip_board'): _, setting = line.split() val = int(setting) self.flip_board = True if val else False elif line.startswith('black_men'): self.black_men = self._parse_items(line) self._add_men_to_board(self.black_men, BLACK | MAN) self._bm_check = True elif line.startswith('white_men'): self.white_men = self._parse_items(line) self._add_men_to_board(self.white_men, WHITE | MAN) self._wm_check = True elif line.startswith('black_kings'): self.black_kings = self._parse_items(line) self._add_men_to_board(self.black_kings, BLACK | KING) self._bk_check = True elif line.startswith('white_kings'): self.white_kings = self._parse_items(line) self._add_men_to_board(self.white_kings, WHITE | KING) self._wk_check = True idx += 1 if (not self._move_check and not self._bm_check and not self._wm_check and not self._bk_check and not self._wk_check): raise IOError, 'Error in <setup> section: not all required items found' return idx def _is_move(self, delta): return delta in KING_IDX def _is_jump(self, delta): return delta not in KING_IDX def _try_move(self, idx, start, dest, state_copy, annotation): legal_moves = self._model.legal_moves(state_copy) # match move from file with available moves on checkerboard found = False startsq, destsq = squaremap[start], squaremap[dest] for move in legal_moves: if (startsq == move.affected_squares[FIRST][0] and destsq == move.affected_squares[LAST][0]): self._model.make_move(move, state_copy, False, False) move.annotation = annotation self.moves.append(move) found = True break if not found: raise IOError, 'Illegal move found in file, line %d' % (idx+1) def _try_jump(self, idx, start, dest, state_copy, annotation): if not self._model.captures_available(state_copy): return False legal_moves = self._model.legal_moves(state_copy) # match jump from file with available jumps on checkerboard startsq, destsq = squaremap[start], squaremap[dest] small, large = min(startsq, destsq), max(startsq, destsq) found = False for move in legal_moves: # a valid jump may either have a single jump in it, or # multiple jumps. In the multiple jump case, startsq is the # source of the first jump, and destsq is the endpoint of the # last jump. if (startsq == move.affected_squares[FIRST][0] and destsq == move.affected_squares[LAST][0]): self._model.make_move(move, state_copy, False, False) move.annotation = annotation self.moves.append(move) found = True break return found def _parse_moves(self, lines, idx, linelen): """ Each move in the file lists the beginning and ending square, along with an optional annotation string (in Creole format) that describes it. Since the move listing in the file contains less information than we need inside our Checkerboard model, I make sure that each move works on a copy of the model before I commit to using it inside the code. """ state_copy = copy.deepcopy(self._model.curr_state) idx += 1 while idx < linelen: line = lines[idx].strip() if line == "": idx += 1 continue # ignore blank lines try: movestr, annotation = line.split(';', 1) except ValueError: raise IOError, 'Unrecognized section in file, line %d' % (idx+1) # move is always part of the annotation; I just don't want to # have to repeat it explicitly in the file. annotation = movestr + annotation # analyze affected squares to perform a move or jump. try: start, dest = [int(x) for x in movestr.split('-')] except ValueError: raise IOError, 'Bad move format in file, line %d' % idx delta = squaremap[start] - squaremap[dest] if self._is_move(delta): self._try_move(idx, start, dest, state_copy, annotation) else: jumped = self._try_jump(idx, start, dest, state_copy, annotation) if not jumped: raise IOError, 'Bad move format in file, line %d' % idx idx += 1 self.moves.reverse() return idx
class CheckersGame(Game): def __init__(self, history=[]): # Rollout statistics self.child_visits = [] # Terminal values for the first player # 1 for win # 0 for draw # -1 for loss # None for incomplete self.game_value = None # XXX Conventions: # - Black player moves first # - Ego-centric views assume the king row are at the top, i.e. starts at the bottom (Second player has the same view as absolute) self.ch = Checkers() # Action space self.actions = [] # Simple moves for from_sq in range(self.ch.n_positions): for to_sq in self.ch.neighbors[from_sq]: if to_sq is not None: simple_move = (from_sq, to_sq) self.actions.append(simple_move) assert 98 == len(self.actions), 'There should be 98 simple moves.' # Jumps for from_sq in range(self.ch.n_positions): row, col = self.ch.sq2pos(from_sq) # For each direction for di, (drow, dcol) in enumerate(Checkers.dir2del): next_row, next_col = row + 2 * drow, col + 2 * dcol if 0 <= next_row < self.ch.size and 0 <= next_col < self.ch.size: # Within bound to_sq = self.ch.pos2sq(next_row, next_col) jump = (from_sq, to_sq) self.actions.append(jump) self.num_actions = len(self.actions) assert 98 + 72 == self.num_actions, 'There should be 98 simple moves and 72 jumps.' # Inverse dictionary self.action2ind = { action: ind for ind, action in enumerate(self.actions) } # Square mapping from absolute to first player's ego-centric (reflect through the center) self.abs2ego_sq = {} for sq in range(self.ch.n_positions): row, col = self.ch.sq2pos(sq) re_row, re_col = -row + self.ch.size - 1, -col + self.ch.size - 1 re_sq = self.ch.pos2sq(re_row, re_col) self.abs2ego_sq[sq] = re_sq # Inverse self.ego2abs_sq = {re_sq: sq for sq, re_sq in self.abs2ego_sq.items()} # Move mapping from absolute to first player's ego-centric self.abs2ego_ac = {} for ac, (from_sq, to_sq) in enumerate(self.actions): ego_move = (self.abs2ego_sq[from_sq], self.abs2ego_sq[to_sq]) ego_ac = self.action2ind[ego_move] self.abs2ego_ac[ac] = ego_ac # Inverse self.ego2abs_ac = { ego_ac: ac for ac, ego_ac in self.abs2ego_ac.items() } # Fast forward to the last state by taking actions from history self.history = [] for action in history: self.apply(action) def clone(self): game = CheckersGame() state = self.ch.save_state() game.ch.restore_state(state) return game def apply(self, action_index): from_sq, to_sq = self.actions[action_index] board, turn, last_moved_piece, all_next_moves, winner = self.ch.move( from_sq, to_sq) # Terminate when one player wins if winner == 'black': self.game_value = 1 elif winner == 'white': self.game_value = -1 self.history.append(action_index) def legal_actions(self): moves = self.ch.legal_moves() action_idices = {self.action2ind[move] for move in moves} return action_idices def is_first_player_turn(self): return self.ch.turn == 'black' def ego_board_representation(self): # XXX Channels # 0 my men # 1 my kings # 2 opponent's men # 3 opponent's kings # 4 my last moved piece # QUESTION: try indicating the king row and skipping ego transform? rep = np.zeros((self.ch.size, self.ch.size, 5)) if self.ch.turn == 'white': # Same as the absolute view for sq in self.ch.board['white']['men']: row, col = self.ch.sq2pos(sq) rep[row, col, 0] = 1 for sq in self.ch.board['white']['kings']: row, col = self.ch.sq2pos(sq) rep[row, col, 1] = 1 for sq in self.ch.board['black']['men']: row, col = self.ch.sq2pos(sq) rep[row, col, 2] = 1 for sq in self.ch.board['black']['kings']: row, col = self.ch.sq2pos(sq) rep[row, col, 3] = 1 if self.ch._last_moved_piece is not None: row, col = self.ch.sq2pos(self.ch._last_moved_piece) rep[row, col, 4] = 1 else: # Need to invert the board for sq in self.ch.board['black']['men']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 0] = 1 for sq in self.ch.board['black']['kings']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 1] = 1 for sq in self.ch.board['white']['men']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 2] = 1 for sq in self.ch.board['white']['kings']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 3] = 1 if self.ch._last_moved_piece is not None: sq = self.abs2ego_sq[self.ch._last_moved_piece] row, col = self.ch.sq2pos(sq) rep[row, col, 4] = 1 return rep def ego_sample(self, state_index: int): # Fast forward game = CheckersGame(list(self.history[:state_index])) # Ego-centric views of the current player rep = game.ego_board_representation() # Zero-sum game ego_val = self.game_value if game.is_first_player_turn() else ( 0 - self.game_value) # Ego-centric actions if game.is_first_player_turn(): # Invert actions for the first player visits = np.zeros(self.num_actions) for i in range(self.num_actions): visits[self.abs2ego_ac[i]] = self.child_visits[state_index][i] else: visits = np.asarray(self.child_visits[state_index]) return rep, ego_val, visits def ego2abs_policy(self, is_first_player, ego_policy): if is_first_player: policy = np.zeros(self.num_actions) for ego_ac, pr in enumerate(ego_policy): policy[self.ego2abs_ac[ego_ac]] = pr else: policy = ego_policy return policy