def test_move(self): c = Checkers() # COMMENTS ARE FROM THE PLAYER'S PERSPECTIVE # move red forward left self.assertTrue(c.board[3][2] is None) c.move(src=(1, 2), dest=(2, 3)) self.assertTrue(c.board[3][2] is not None) # move red forward right self.assertTrue(c.board[4][1] is None) c.move(src=(2, 3), dest=(1, 4)) self.assertTrue(c.board[4][1] is not None) # try moving red where an enemy is self.assertTrue(c.board[5][0].color is Checkers.colors['black']) with self.assertRaises(MoveError): c.move(src=(1, 4), dest=(0, 5)) self.assertTrue(c.board[5][0].color is Checkers.colors['black']) # try moving red backwards as a regular piece self.assertTrue(c.board[3][2] is None) with self.assertRaises(MoveError): c.move(src=(1, 4), dest=(2, 3)) # try moving a piece that doesn't exist self.assertTrue(c.board[3][1] is None) with self.assertRaises(KeyError): c.move(src=(1, 3), dest=(2, 2))
def test_single_jump(self): c = Checkers() # move a red piece in a position where black can jump c.move(src=(1, 2), dest=(2, 3)) c.move(src=(2, 3), dest=(3, 4)) # make sure the destination is empty self.assertTrue(c.board[3][4] is None) # jump a black piece over the red piece c.move(src=(2, 5), dest=(4, 3)) # make sure the piece is at the destination self.assertTrue(c.board[3][4].color is Checkers.colors['black']) # make sure the jumped piece is removed from the board self.assertTrue(c.board[4][3] is None)
def eval(agent:Agent, env: Checkers, color:str, n_games=100): agent.net.eval() opponent = Agent(gamma=agent.gamma, epsilon=1, lr=0, input_dims=[8*8 + 1], batch_size=agent.batch_size, action_space=agent.action_space, eps_dec=0, max_mem_size=0 ) opponent.net.eval() initial_state = env.save_state() score = {'black': 0, 'white': 0} for i in tqdm(range(n_games)): env.restore_state(initial_state) winner = None moves = torch.tensor(env.legal_moves()) board, turn, last_moved_piece = env.save_state() brain = agent if turn == color else opponent board_tensor = torch.from_numpy(env.flat_board()).view(-1).float() encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor([0.]) observation = torch.cat([board_tensor, encoded_turn]) while not winner: action = brain.choose_action(observation) while not action_is_legal(action, moves): action = brain.choose_action(observation) new_board, new_turn, _, moves, winner = env.move(*action.tolist()) moves = torch.tensor(moves) board_tensor = torch.from_numpy(env.flat_board()).view(-1).float() encoded_turn = torch.tensor([1. if turn == 'black' else 0.]) observation = torch.cat([board_tensor, encoded_turn]) brain = agent if turn == color else opponent score[winner] +=1 agent.net.train() return score[color] / n_games
class BoardView(gtk.DrawingArea): def __init__(self): gtk.DrawingArea.__init__(self) self.tile_size = 100 self.tile_count = 8 self.w = self.h = self.tile_count * self.tile_size cmap = self.get_colormap() self.color_black = cmap.alloc_color('#000000') self.color_white = cmap.alloc_color('#FFFFFF') self.color_red = cmap.alloc_color('#BB3333') self.color_green = cmap.alloc_color('#33DD33') self.set_size_request(320, 320) self.connect("expose_event", self.exposeEvent) self.connect("size-allocate", self.__sizeAllocate) self.connect("button_press_event", self.buttonPressEvent) self.set_events(gtk.gdk.EXPOSURE_MASK | gtk.gdk.BUTTON_PRESS_MASK) #TODO: use settings self.tile_w = gtk.gdk.pixbuf_new_from_file("images/tile_w.jpg") self.tile_b = gtk.gdk.pixbuf_new_from_file("images/tile_b.jpg") self.selector = gtk.gdk.pixbuf_new_from_file("images/selector.png") self.checkers = Checkers() self.checkers.setSelected((2,5)) def __sizeAllocate(self, widget, rect): if rect.width < rect.height: self.tile_size = rect.width / self.tile_count else: self.tile_size = rect.height / self.tile_count self.w = self.h = self.tile_count * self.tile_size def exposeEvent(self, widget, event): window = self.window gc = window.new_gc() rect = gtk.gdk.Rectangle(0, 0, self.w, self.h) window.begin_paint_rect(rect) self.__drawBoard(window, gc, rect) window.end_paint() def __drawBackground(self, window, gc, rect): bg_w = self.tile_w.get_width() bg_h = self.tile_w.get_height() mod = True for i in range(self.tile_count): for ii in range(self.tile_count): if mod: window.draw_pixbuf(gc, self.tile_w, (bg_w - self.tile_size) / 2, (bg_h - self.tile_size) / 2, i * self.tile_size, ii * self.tile_size, self.tile_size, self.tile_size) else: window.draw_pixbuf(gc, self.tile_b, (bg_w - self.tile_size) / 2, (bg_h - self.tile_size) / 2, i * self.tile_size, ii * self.tile_size, self.tile_size, self.tile_size) mod = not mod mod = not mod def __drawPiece(self, window, gc, rect, type, x, y): gc s = self.tile_size if abs(type) == Checkers.WH: gc.set_foreground(self.color_white) window.draw_arc(gc, True, s*x+s/10, s*y+s/10, s - s/5, s - s/5, 0, 65*360) if type < 0: gc.set_foreground(self.color_black) window.draw_arc(gc, True, s*x+s/4, s*y+s/4, s/2, s/2, 0, 65*360) elif abs(type) == Checkers.BL: gc.set_foreground(self.color_black) window.draw_arc(gc, True, s*x+s/10, s*y+s/10, s - s/5, s - s/5, 0, 65*360) if type < 0: gc.set_foreground(self.color_white) window.draw_arc(gc, True, s*x+s/4, s*y+s/4, s/2, s/2, 0, 65*360) def __drawPieces(self, window, gc, rect): for i in range(8): for ii in range(8): if (i+ii)%2: self.__drawPiece(window, gc, rect, self.checkers.checkTile(ii,i), ii, i) def __drawSelector(self, window, gc): a = self.checkers.getSelected()[0] if a: gc.set_line_attributes(self.tile_size / 20, gtk.gdk.LINE_SOLID, gtk.gdk.CAP_NOT_LAST, gtk.gdk.JOIN_MITER) gc.set_foreground(self.color_red) window.draw_rectangle(gc, False, a[0] * self.tile_size, a[1] * self.tile_size, self.tile_size, self.tile_size) def __drawMoves(self, window, gc): a = self.checkers.getSelected()[1:] for i in a: gc.set_line_attributes(self.tile_size / 20, gtk.gdk.LINE_SOLID, gtk.gdk.CAP_NOT_LAST, gtk.gdk.JOIN_MITER) gc.set_foreground(self.color_green) window.draw_rectangle(gc, False, i[0] * self.tile_size, i[1] * self.tile_size, self.tile_size, self.tile_size) def __drawBoard(self, window, gc, rect): self.__drawBackground(window, gc, rect) self.__drawPieces(window, gc, rect) self.__drawSelector(window, gc) self.__drawMoves(window, gc) def __getCoords(self, x, y): x = int(x / self.tile_size) y = int(y / self.tile_size) if x > 7 or y > 7: return None else: return (x, y) def buttonPressEvent(self, widget, event): if event.button != 1: return target = self.__getCoords(event.x, event.y) if not target: return m = self.checkers.getSelected() if target in m[1:]: self.checkers.move(m, target) else: self.checkers.setSelected(target) self.exposeEvent(None, None) def reset(self): self.checkers = Checkers()
env.restore_state(initial_state) winner = None moves = torch.tensor(env.legal_moves()) board, turn, last_moved_piece = env.save_state() brain = players[turn] board_tensor = torch.from_numpy(env.flat_board()).view(-1).float() encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor( [0.]) observation = torch.cat([board_tensor, encoded_turn]) while not winner: action = brain.choose_action(observation) if not action_is_legal(action, moves): reward = -1000000 new_turn = turn else: new_board, new_turn, _, moves, winner = env.move( *action.tolist()) moves = torch.tensor(moves) turn_score, new_turn_score = (get_score(new_board, player) - get_score(board, player) for player in [turn, new_turn]) reward = turn_score - new_turn_score score[turn] += reward board_tensor = torch.from_numpy(env.flat_board()).view(-1).float() encoded_turn = torch.tensor([1. if turn == 'black' else 0.]) new_observation = torch.cat([board_tensor, encoded_turn]) brain.store_transition(observation, action, reward, new_observation, bool(winner)) brain.learn((observation != new_observation).any().item()) observation = new_observation turn = new_turn brain = players[turn]
def test_invalid_jump(self): c = Checkers() # try jumping over the same colored piece with self.assertRaises(MoveError): c.move(src=(2, 1), dest=(0, 3))
class CheckersGame(Game): def __init__(self, history=[]): # Rollout statistics self.child_visits = [] # Terminal values for the first player # 1 for win # 0 for draw # -1 for loss # None for incomplete self.game_value = None # XXX Conventions: # - Black player moves first # - Ego-centric views assume the king row are at the top, i.e. starts at the bottom (Second player has the same view as absolute) self.ch = Checkers() # Action space self.actions = [] # Simple moves for from_sq in range(self.ch.n_positions): for to_sq in self.ch.neighbors[from_sq]: if to_sq is not None: simple_move = (from_sq, to_sq) self.actions.append(simple_move) assert 98 == len(self.actions), 'There should be 98 simple moves.' # Jumps for from_sq in range(self.ch.n_positions): row, col = self.ch.sq2pos(from_sq) # For each direction for di, (drow, dcol) in enumerate(Checkers.dir2del): next_row, next_col = row + 2 * drow, col + 2 * dcol if 0 <= next_row < self.ch.size and 0 <= next_col < self.ch.size: # Within bound to_sq = self.ch.pos2sq(next_row, next_col) jump = (from_sq, to_sq) self.actions.append(jump) self.num_actions = len(self.actions) assert 98 + 72 == self.num_actions, 'There should be 98 simple moves and 72 jumps.' # Inverse dictionary self.action2ind = { action: ind for ind, action in enumerate(self.actions) } # Square mapping from absolute to first player's ego-centric (reflect through the center) self.abs2ego_sq = {} for sq in range(self.ch.n_positions): row, col = self.ch.sq2pos(sq) re_row, re_col = -row + self.ch.size - 1, -col + self.ch.size - 1 re_sq = self.ch.pos2sq(re_row, re_col) self.abs2ego_sq[sq] = re_sq # Inverse self.ego2abs_sq = {re_sq: sq for sq, re_sq in self.abs2ego_sq.items()} # Move mapping from absolute to first player's ego-centric self.abs2ego_ac = {} for ac, (from_sq, to_sq) in enumerate(self.actions): ego_move = (self.abs2ego_sq[from_sq], self.abs2ego_sq[to_sq]) ego_ac = self.action2ind[ego_move] self.abs2ego_ac[ac] = ego_ac # Inverse self.ego2abs_ac = { ego_ac: ac for ac, ego_ac in self.abs2ego_ac.items() } # Fast forward to the last state by taking actions from history self.history = [] for action in history: self.apply(action) def clone(self): game = CheckersGame() state = self.ch.save_state() game.ch.restore_state(state) return game def apply(self, action_index): from_sq, to_sq = self.actions[action_index] board, turn, last_moved_piece, all_next_moves, winner = self.ch.move( from_sq, to_sq) # Terminate when one player wins if winner == 'black': self.game_value = 1 elif winner == 'white': self.game_value = -1 self.history.append(action_index) def legal_actions(self): moves = self.ch.legal_moves() action_idices = {self.action2ind[move] for move in moves} return action_idices def is_first_player_turn(self): return self.ch.turn == 'black' def ego_board_representation(self): # XXX Channels # 0 my men # 1 my kings # 2 opponent's men # 3 opponent's kings # 4 my last moved piece # QUESTION: try indicating the king row and skipping ego transform? rep = np.zeros((self.ch.size, self.ch.size, 5)) if self.ch.turn == 'white': # Same as the absolute view for sq in self.ch.board['white']['men']: row, col = self.ch.sq2pos(sq) rep[row, col, 0] = 1 for sq in self.ch.board['white']['kings']: row, col = self.ch.sq2pos(sq) rep[row, col, 1] = 1 for sq in self.ch.board['black']['men']: row, col = self.ch.sq2pos(sq) rep[row, col, 2] = 1 for sq in self.ch.board['black']['kings']: row, col = self.ch.sq2pos(sq) rep[row, col, 3] = 1 if self.ch._last_moved_piece is not None: row, col = self.ch.sq2pos(self.ch._last_moved_piece) rep[row, col, 4] = 1 else: # Need to invert the board for sq in self.ch.board['black']['men']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 0] = 1 for sq in self.ch.board['black']['kings']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 1] = 1 for sq in self.ch.board['white']['men']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 2] = 1 for sq in self.ch.board['white']['kings']: sq = self.abs2ego_sq[sq] row, col = self.ch.sq2pos(sq) rep[row, col, 3] = 1 if self.ch._last_moved_piece is not None: sq = self.abs2ego_sq[self.ch._last_moved_piece] row, col = self.ch.sq2pos(sq) rep[row, col, 4] = 1 return rep def ego_sample(self, state_index: int): # Fast forward game = CheckersGame(list(self.history[:state_index])) # Ego-centric views of the current player rep = game.ego_board_representation() # Zero-sum game ego_val = self.game_value if game.is_first_player_turn() else ( 0 - self.game_value) # Ego-centric actions if game.is_first_player_turn(): # Invert actions for the first player visits = np.zeros(self.num_actions) for i in range(self.num_actions): visits[self.abs2ego_ac[i]] = self.child_visits[state_index][i] else: visits = np.asarray(self.child_visits[state_index]) return rep, ego_val, visits def ego2abs_policy(self, is_first_player, ego_policy): if is_first_player: policy = np.zeros(self.num_actions) for ego_ac, pr in enumerate(ego_policy): policy[self.ego2abs_ac[ego_ac]] = pr else: policy = ego_policy return policy