def main(): args = parse_args() agent = load_agent(args) board_size = args.board_size game = GameState.new_game(board_size) if args.playas == 'black': human_play = Player.black elif args.playas == 'white': human_play = Player.white else: raise ValueError('Unknown option for playas: {}'.format(args.playas)) while not game.is_over(): print_board(game.board) if game.nplayer == human_play: human_move = input('-- ') if len(human_move) > 1: point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = Move.pass_turn() else: move = agent.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Black win") else: print("White win")
def select_move(self, gs): if self.strategy.should_pass(gs): return Move.pass_turn() elif self.strategy.should_resign(gs): return Move.resign() else: return self.agent.select_move(gs)
def main(): args = parse_args() board_size = args.size game = GameState.new_game(board_size) #bot = RandomAgent() bot = MCTSAgent(100, 1., 64) while not game.is_over(): print_board(game.board) if game.nplayer == Player.black: human_move = input('-- ') if len(human_move) > 1: point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = Move.pass_turn() else: move = bot.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Black win") else: print("White win")
def select_move(self, gs): num_moves = self.encoder.num_points() board_tensor = self.encoder.encode(gs) X = np.array([board_tensor]) # epsilon greedy exploration if np.random.random() < self.temperature: move_probs = np.ones(num_moves) / num_moves else: move_probs = self.model.predict(X)[0] move_probs = self.clip_probs_(move_probs) candidates = np.arange(num_moves) ranked_moves = np.random.choice(candidates, num_moves, replace=False, p=move_probs) for pt_idx in ranked_moves: point = self.encoder.decode_point_index(pt_idx) move = Move.play(point) is_valid = gs.is_valid_move(move) is_eye = is_point_an_eye(gs.board, point, gs.nplayer) if is_valid and not is_eye: if self.collector is not None: self.collector.record_decision(state=board_tensor, action=pt_idx) return move #this policy gradient does not learn from passing turn return Move.pass_turn()
def select_move(self, gs): board_tensor = self.encoder.encode(gs) moves = [] board_tensors = [] for move in gs.legal_moves(): if not move.is_play: continue moves.append(self.encoder.encode_point(move.pt)) board_tensors.append(board_tensor) if not moves: return Move.pass_turn() num_moves = len(moves) board_tensors = np.array(board_tensors) move_vectors = np.zeros((num_moves, self.encoder.num_points())) for i, move in enumerate(moves): move_vectors[i][move] = 1. #Q-learning uses 2 input tensors: the states and the actions values = self.model.predict([board_tensors, move_vectors]) values = values.reshape(len(moves)) ranked_moves = self.rank_move_eps_greedy(values) for move_idx in ranked_moves: point = self.encoder.decode_point_index(moves[move_idx]) if not is_point_an_eye(gs.board, point, gs.nplayer): if self.collector is not None: self.collector.record_decision(state=board_tensor, action=moves[move_idx]) return Move.play(point) return Move.pass_turn()
def select_move(self, gs): num_moves = self.encoder.num_points() board_tensor = self.encoder.encode(gs) X = np.array([board_tensor]) actions, values = self.model.predict(X) move_probs = actions[0] estimated_value = values[0][0] eps = 1e-6 #TODO: how to use temperature here? #TODO: move this into a function that does the clipping move_probs = np.clip(move_probs, eps, 1 - eps) move_probs = move_probs / np.sum(move_probs) candidates = np.arange(num_moves) ranked_moves = np.random.choice(candidates, num_moves, replace=False, p=move_probs) for point_idx in ranked_moves: point = self.encoder.decode_point_index(point_idx) move = Move.play(point) is_move_valid = _valid = gs.is_valid_move(move) is_eye = is_point_an_eye(gs.board, point, gs.nplayer) if is_move_valid and not is_eye: if self.collector is not None: self.collector.record_decision( state=board_tensor, action=point_idx, estimated_value=estimated_value) return Move.play(point) return Move.pass_turn()
def test_encode(self): encoder = get_encoder_by_name('oneplane', 9) gs = GameState.new_game(9) gs = gs.apply_move(Move.play(Point(5, 5))) gs = gs.apply_move(Move.play(Point(4, 5))) code = encoder.encode(gs) self.assertEqual(1, code[0][4][4]) self.assertEqual(-1, code[0][3][4])
def random_move_(self, game_state): idxes = np.arange(len(self.cache)) np.random.shuffle(idxes) for idx in idxes: m = Move.play(self.cache[idx]) if (game_state.is_valid_move(m) and not is_point_an_eye( game_state.board, m.pt, game_state.nplayer)): return m return Move.pass_turn()
def handle_play(self, color, move): if move.lower() == 'pass': self.game_state = self.game_state.apply_move(Move.pass_turn()) elif move.lower() == 'resign': self.game_state = self.game_state.apply_move(Move.resign()) else: self.game_state = self.game_state.apply_move( gtp_position_to_coord(move)) return response.success()
def legal_moves(self): if self.is_over(): return list() ret = [Move.pass_turn(), Move.resign()] for ri in range(1, self.board.sz + 1): for ci in range(1, self.board.sz + 1): m = Move.play(Point(ri, ci)) if self.is_valid_move(m): ret.append(m) return ret
def select_move(self, gs): eps = 1e-6 num_actions = self.encoder.num_points() move_probs = self.predict(gs) #scaling, clipping and re-normalizing move probabilities to reduce ambiguity, #then sample from the rescaled moves move_probs = move_probs ** 3 move_probs = np.clip(move_probs, eps, 1 - eps) move_probs = move_probs / np.sum(move_probs) candidates = np.arange(num_actions) actions = np.random.choice(candidates, num_actions, replace=False, p=move_probs) for point_idx in actions: point = self.encoder.decode_point_index(point_idx) if gs.is_valid_move(Move.play(point)) and not is_point_an_eye(gs.board, point, gs.nplayer): return Move.play(point) return Move.pass_turn()
def recursive_minimax_search_(self, game_state, depth, eval_fn): if game_state.is_over(): winner = game_state.winner() if winner == Player.black: return (MAX_SCORE, None) elif winner is None: return (TIE_SCORE, None) else: return (MIN_SCORE, None) if depth == 0: return (eval_fn(game_state), None) best_moves = list() if game_state.nplayer == Player.black: best_score = MIN_SCORE else: best_score = MAX_SCORE for m in game_state.legal_moves(): nstate = game_state.apply_move(m) ndepth = depth if nstate.nplayer == Player.black: ndepth -= 1 (score, _) = self.recursive_minimax_search_(nstate, ndepth, eval_fn) if self.is_improvement_(score, best_score, game_state.nplayer): best_score = score best_moves = [m] elif score == best_score: best_moves.append(m) if len(best_moves) > 0: return (best_score, random.choice(best_moves)) else: return (best_score, Move.pass_turn())
def play_their_move(self): their_name = self.their_color.name their_letter = their_name[0].upper() pos = self.command_and_response('genmove {}\n'.format(their_name)) if pos.lower() == 'resign': self.game_state = self.game_state.apply_move(Move.resign()) self.stopped = True elif pos.lower() == 'pass': self.game_state = self.game_state.apply_move(Move.pass_turn()) self.sgf.append(';{}[]\n'.format(their_letter)) if self.game_state.pmove.is_pass: self.stopped = True else: move = gtp_position_to_coord(pos) self.game_state = self.game_state.apply_move(move) self.sgf.append(';{}[{}]\n'.format(their_letter, self.sgf.coordinates(move)))
def select_move(self, game_state): """choose a random valid move that preserves its own eyes.""" candidates = list() for move in game_state.legal_moves(): if not move.is_pass and not move.is_resign: if not is_point_an_eye(game_state.board, move.pt, game_state.nplayer): candidates.append(move) if not candidates: return Move.pass_turn() return random.choice(candidates)
def main(): args = parse_args() with open(args.file) as fd: data = fd.read() sgf = sgf_game.from_string(data) gs, first_move_done = new_game_from_handicap(sgf) print_board(gs.board) for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) print('Move ({},{})'.format(row + 1, col + 1)) else: move = Move.pass_turn() gs = gs.apply_move(move) print_board(gs.board)
def recursive_alpha_beta_minimax_search_(self, game_state, depth, eval_fn, alpha, beta): """ Alpha-Beta Pruning: introduce new parameters alpha and beta. alpha is the best value available to the maximizer from the parent to the root beta is the best value available to the minimizer from the parent to the root we use those 2 values to prune true segments downstream. the idea is if the current node is a maximizer, and the beta it has received from its parent (minimizer) has a lower value than the maximum value found by the current node, we no longer need to explore downstream. if the current node is a minimizer, and the alpha it has received from its parent (maximizer) has a higher value than the minimum value found by the current node, we no longer need to explore downstream. in this implementation, we don't prune on equality """ if game_state.is_over(): winner = game_state.winner() if winner == Player.black: return (MAX_SCORE, None) elif winner is None: return (TIE_SCORE, None) else: return (MIN_SCORE, None) if depth == 0: return (eval_fn(game_state), None) best_moves = list() if game_state.nplayer == Player.black: best_score = MIN_SCORE else: best_score = MAX_SCORE for m in game_state.legal_moves(): if self.should_prune_(best_score, alpha, beta, game_state.nplayer): break nstate = game_state.apply_move(m) ndepth = depth if nstate.nplayer == Player.black: ndepth -= 1 (score, _) = self.recursive_alpha_beta_minimax_search_( nstate, ndepth, eval_fn, alpha, beta) if self.is_improvement_(score, best_score, game_state.nplayer): best_score = score best_moves = [m] if game_state.nplayer == Player.black: if best_score > alpha: alpha = best_score else: if best_score < beta: beta = best_score elif score == best_score: best_moves.append(m) if len(best_moves) > 0: return (best_score, random.choice(best_moves)) else: return (best_score, Move.pass_turn())
def test_encode(self): encoder = get_encoder_by_name('sevenplane', 9) gs = GameState.new_game(9) gs = gs.apply_move(Move.play(Point(2, 7))) gs = gs.apply_move(Move.play(Point(7, 2))) gs = gs.apply_move(Move.play(Point(3, 6))) gs = gs.apply_move(Move.play(Point(6, 3))) gs = gs.apply_move(Move.play(Point(3, 7))) gs = gs.apply_move(Move.play(Point(2, 6))) gs = gs.apply_move(Move.play(Point(2, 5))) code = encoder.encode(gs) self.assertEqual(1., code[0][1][5])
def encode(self, game_state): board_tensor = np.zeros(self.shape()) base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3} for row in range(self.sz): for col in range(self.sz): p = Point(r=row + 1, c=col + 1) gostring = game_state.board.get_go_string_(p) if gostring is None: if game_state.does_move_violate_ko_( game_state.nplayer, Move.play(p)): board_tensor[6][row][col] = 1 else: liberty_plane = min(3, gostring.num_liberties) - 1 liberty_plane += base_plane[gostring.color] board_tensor[liberty_plane][row][col] = 1 return board_tensor
def encode(self, game_state): board_tensor = np.zeros(self.shape()) base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3} next_player = game_state.nplayer if next_player == Player.white: board_tensor[8] = 1 else: board_tensor[9] = 1 for r in range(self.sz): for c in range(self.sz): p = Point(r + 1, c + 1) string = game_state.board.get_go_string_(p) if string is None: if game_state.does_move_violate_ko_( next_player, Move.play(p)): board_tensor[10][r][c] = 1 else: liberty_plane = min(4, string.num_liberties) - 1 liberty_plane += base_plane[string.color] board_tensor[liberty_plane][r][c] = 1 return board_tensor
def main(): board_size = 3 game = GameState.new_game(board_size) #bot = MinimaxAgent(5, None) bot = MCTSAgent(362000, 0.1, 64) while not game.is_over(): print_board(game.board) if game.nplayer == Player.black: human_move = input('-- ') point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = bot.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) print_board(game.board) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Human won") else: print("Bot won")
def select_move(self, game_state): if len(self.cache) != game_state.board.sz * game_state.board.sz: self.cache = self.init_cache_(game_state.board.sz) root = MCTSNode(game_state) for _ in range(self.expandmax): new_node = self.recursive_uct_(root) if new_node is None: break qvalue = self.mc_play_(new_node.game_state) new_node.update(qvalue, self.mc_trials) best_nodes = list() best_score = self.init_best_score_(root.game_state.nplayer) for child in root.children: score = child.qvalue / child.ncount if self.is_improvement_(score, best_score, root.game_state.nplayer): best_score = score best_nodes = [child] elif score == best_score: best_nodes.append(child) if best_nodes: return random.choice(best_nodes).pmove else: return Move.pass_turn()
def decode_move_index(self, index): if index == self.sz * self.sz: return Move.pass_turn() row = index // self.sz col = index % self.sz return Move.play(Point(row + 1, col + 1))
def legal_moves(self): if self.is_over(): return list() ret = [Move.play(pt) for pt in self.empty_positions_()] ret.append(Move.pass_turn()) return ret
def encode_point(self, pt): return self.encode_move(Move.play(pt))
def gtp_position_to_coord(gtp_position): col_str, row_str = gtp_position[0], gtp_position[1:] point = Point(int(row_str), COLS.find(col_str.upper()) + 1) return Move.play(point)