def select_move(self, gs): num_moves = self.encoder.num_points() board_tensor = self.encoder.encode(gs) X = np.array([board_tensor]) actions, values = self.model.predict(X) move_probs = actions[0] estimated_value = values[0][0] eps = 1e-6 #TODO: how to use temperature here? #TODO: move this into a function that does the clipping move_probs = np.clip(move_probs, eps, 1 - eps) move_probs = move_probs / np.sum(move_probs) candidates = np.arange(num_moves) ranked_moves = np.random.choice(candidates, num_moves, replace=False, p=move_probs) for point_idx in ranked_moves: point = self.encoder.decode_point_index(point_idx) move = Move.play(point) is_move_valid = _valid = gs.is_valid_move(move) is_eye = is_point_an_eye(gs.board, point, gs.nplayer) if is_move_valid and not is_eye: if self.collector is not None: self.collector.record_decision( state=board_tensor, action=point_idx, estimated_value=estimated_value) return Move.play(point) return Move.pass_turn()
def test_encode(self): encoder = get_encoder_by_name('oneplane', 9) gs = GameState.new_game(9) gs = gs.apply_move(Move.play(Point(5, 5))) gs = gs.apply_move(Move.play(Point(4, 5))) code = encoder.encode(gs) self.assertEqual(1, code[0][4][4]) self.assertEqual(-1, code[0][3][4])
def main(): args = parse_args() agent = load_agent(args) board_size = args.board_size game = GameState.new_game(board_size) if args.playas == 'black': human_play = Player.black elif args.playas == 'white': human_play = Player.white else: raise ValueError('Unknown option for playas: {}'.format(args.playas)) while not game.is_over(): print_board(game.board) if game.nplayer == human_play: human_move = input('-- ') if len(human_move) > 1: point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = Move.pass_turn() else: move = agent.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Black win") else: print("White win")
def select_move(self, gs): num_moves = self.encoder.num_points() board_tensor = self.encoder.encode(gs) X = np.array([board_tensor]) # epsilon greedy exploration if np.random.random() < self.temperature: move_probs = np.ones(num_moves) / num_moves else: move_probs = self.model.predict(X)[0] move_probs = self.clip_probs_(move_probs) candidates = np.arange(num_moves) ranked_moves = np.random.choice(candidates, num_moves, replace=False, p=move_probs) for pt_idx in ranked_moves: point = self.encoder.decode_point_index(pt_idx) move = Move.play(point) is_valid = gs.is_valid_move(move) is_eye = is_point_an_eye(gs.board, point, gs.nplayer) if is_valid and not is_eye: if self.collector is not None: self.collector.record_decision(state=board_tensor, action=pt_idx) return move #this policy gradient does not learn from passing turn return Move.pass_turn()
def main(): args = parse_args() board_size = args.size game = GameState.new_game(board_size) #bot = RandomAgent() bot = MCTSAgent(100, 1., 64) while not game.is_over(): print_board(game.board) if game.nplayer == Player.black: human_move = input('-- ') if len(human_move) > 1: point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = Move.pass_turn() else: move = bot.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Black win") else: print("White win")
def select_move(self, gs): board_tensor = self.encoder.encode(gs) moves = [] board_tensors = [] for move in gs.legal_moves(): if not move.is_play: continue moves.append(self.encoder.encode_point(move.pt)) board_tensors.append(board_tensor) if not moves: return Move.pass_turn() num_moves = len(moves) board_tensors = np.array(board_tensors) move_vectors = np.zeros((num_moves, self.encoder.num_points())) for i, move in enumerate(moves): move_vectors[i][move] = 1. #Q-learning uses 2 input tensors: the states and the actions values = self.model.predict([board_tensors, move_vectors]) values = values.reshape(len(moves)) ranked_moves = self.rank_move_eps_greedy(values) for move_idx in ranked_moves: point = self.encoder.decode_point_index(moves[move_idx]) if not is_point_an_eye(gs.board, point, gs.nplayer): if self.collector is not None: self.collector.record_decision(state=board_tensor, action=moves[move_idx]) return Move.play(point) return Move.pass_turn()
def select_move(self, gs): eps = 1e-6 num_actions = self.encoder.num_points() move_probs = self.predict(gs) #scaling, clipping and re-normalizing move probabilities to reduce ambiguity, #then sample from the rescaled moves move_probs = move_probs ** 3 move_probs = np.clip(move_probs, eps, 1 - eps) move_probs = move_probs / np.sum(move_probs) candidates = np.arange(num_actions) actions = np.random.choice(candidates, num_actions, replace=False, p=move_probs) for point_idx in actions: point = self.encoder.decode_point_index(point_idx) if gs.is_valid_move(Move.play(point)) and not is_point_an_eye(gs.board, point, gs.nplayer): return Move.play(point) return Move.pass_turn()
def random_move_(self, game_state): idxes = np.arange(len(self.cache)) np.random.shuffle(idxes) for idx in idxes: m = Move.play(self.cache[idx]) if (game_state.is_valid_move(m) and not is_point_an_eye( game_state.board, m.pt, game_state.nplayer)): return m return Move.pass_turn()
def legal_moves(self): if self.is_over(): return list() ret = [Move.pass_turn(), Move.resign()] for ri in range(1, self.board.sz + 1): for ci in range(1, self.board.sz + 1): m = Move.play(Point(ri, ci)) if self.is_valid_move(m): ret.append(m) return ret
def test_encode(self): encoder = get_encoder_by_name('sevenplane', 9) gs = GameState.new_game(9) gs = gs.apply_move(Move.play(Point(2, 7))) gs = gs.apply_move(Move.play(Point(7, 2))) gs = gs.apply_move(Move.play(Point(3, 6))) gs = gs.apply_move(Move.play(Point(6, 3))) gs = gs.apply_move(Move.play(Point(3, 7))) gs = gs.apply_move(Move.play(Point(2, 6))) gs = gs.apply_move(Move.play(Point(2, 5))) code = encoder.encode(gs) self.assertEqual(1., code[0][1][5])
def encode(self, game_state): board_tensor = np.zeros(self.shape()) base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3} for row in range(self.sz): for col in range(self.sz): p = Point(r=row + 1, c=col + 1) gostring = game_state.board.get_go_string_(p) if gostring is None: if game_state.does_move_violate_ko_( game_state.nplayer, Move.play(p)): board_tensor[6][row][col] = 1 else: liberty_plane = min(3, gostring.num_liberties) - 1 liberty_plane += base_plane[gostring.color] board_tensor[liberty_plane][row][col] = 1 return board_tensor
def main(): args = parse_args() with open(args.file) as fd: data = fd.read() sgf = sgf_game.from_string(data) gs, first_move_done = new_game_from_handicap(sgf) print_board(gs.board) for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) print('Move ({},{})'.format(row + 1, col + 1)) else: move = Move.pass_turn() gs = gs.apply_move(move) print_board(gs.board)
def encode(self, game_state): board_tensor = np.zeros(self.shape()) base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3} next_player = game_state.nplayer if next_player == Player.white: board_tensor[8] = 1 else: board_tensor[9] = 1 for r in range(self.sz): for c in range(self.sz): p = Point(r + 1, c + 1) string = game_state.board.get_go_string_(p) if string is None: if game_state.does_move_violate_ko_( next_player, Move.play(p)): board_tensor[10][r][c] = 1 else: liberty_plane = min(4, string.num_liberties) - 1 liberty_plane += base_plane[string.color] board_tensor[liberty_plane][r][c] = 1 return board_tensor
def main(): board_size = 3 game = GameState.new_game(board_size) #bot = MinimaxAgent(5, None) bot = MCTSAgent(362000, 0.1, 64) while not game.is_over(): print_board(game.board) if game.nplayer == Player.black: human_move = input('-- ') point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = bot.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) print_board(game.board) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Human won") else: print("Bot won")
def gtp_position_to_coord(gtp_position): col_str, row_str = gtp_position[0], gtp_position[1:] point = Point(int(row_str), COLS.find(col_str.upper()) + 1) return Move.play(point)
def decode_move_index(self, index): if index == self.sz * self.sz: return Move.pass_turn() row = index // self.sz col = index % self.sz return Move.play(Point(row + 1, col + 1))
def legal_moves(self): if self.is_over(): return list() ret = [Move.play(pt) for pt in self.empty_positions_()] ret.append(Move.pass_turn()) return ret
def encode_point(self, pt): return self.encode_move(Move.play(pt))