def get_handicap(sgf): go_board = Board(19) first_move_done = False move = None game_state = GameState.new_game(19) if sgf.get_handicap() is not None and sgf.get_handicap() != 0: for setup in sgf.get_root().get_setup_stones(): for move in setup: row, col = move go_board.place_stone(Player.black, Point(row + 1, col + 1)) first_move_done = True game_state = GameState(go_board, Player.white, None, move) return game_state, first_move_done
def new_game_from_handicap(sgf): board = Board(19) first_move_done = False move = None gs = GameState.new_game(19) if sgf.get_handicap() is not None and sgf.get_handicap() != 0: print('Handicap detected') for setup in sgf.get_root().get_setup_stones(): for move in setup: row, col = move board.place_stone(Player.black, Point(row + 1, col + 1)) first_move_done = True gs = GameState(board, Player.white, None, move) return gs, first_move_done
def main(): args = parse_args() agent = load_agent(args) board_size = args.board_size game = GameState.new_game(board_size) if args.playas == 'black': human_play = Player.black elif args.playas == 'white': human_play = Player.white else: raise ValueError('Unknown option for playas: {}'.format(args.playas)) while not game.is_over(): print_board(game.board) if game.nplayer == human_play: human_move = input('-- ') if len(human_move) > 1: point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = Move.pass_turn() else: move = agent.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Black win") else: print("White win")
def main(): board_size = 9 game = GameState.new_game(board_size) bots = { # Player.black : RandomAgent(), # Player.white : RandomAgent(), Player.black: FastRandomAgent(board_size), Player.white: FastRandomAgent(board_size), } while not game.is_over(): time.sleep(0.1) # slow down so we can observe #print(chr(27) + "[2J") #clear screen print_board(game.board) bot_move = bots[game.nplayer].select_move(game) print_move(game.nplayer, bot_move) game = game.apply_move(bot_move) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Black win") else: print("White win")
def main(): args = parse_args() board_size = args.size game = GameState.new_game(board_size) #bot = RandomAgent() bot = MCTSAgent(100, 1., 64) while not game.is_over(): print_board(game.board) if game.nplayer == Player.black: human_move = input('-- ') if len(human_move) > 1: point = point_from_coord(human_move.strip()) move = Move.play(point) else: move = Move.pass_turn() else: move = bot.select_move(game) print_move(game.nplayer, move) game = game.apply_move(move) winner = game.winner() if winner is None: print("Tie") elif winner == Player.black: print("Black win") else: print("White win")
def test_new_game(self): start = GameState.new_game(19) next_state = start.apply_move(Move.play(Point(16, 16))) self.assertEqual(start, next_state.prev) self.assertEqual(Player.white, next_state.nplayer) self.assertEqual(Player.black, next_state.board.get(Point(16, 16)))
def test_encode(self): encoder = get_encoder_by_name('oneplane', 9) gs = GameState.new_game(9) gs = gs.apply_move(Move.play(Point(5, 5))) gs = gs.apply_move(Move.play(Point(4, 5))) code = encoder.encode(gs) self.assertEqual(1, code[0][4][4]) self.assertEqual(-1, code[0][3][4])
def eval_new_agent(args, new_agent, old_agent, rounds = 100, acceptability = 0.53): old_temp = new_agent.temperature new_temp = old_agent.temperature new_agent.set_temperature(0.) old_agent.set_temperature(0.) players = { Player.black : new_agent, Player.white : old_agent, } win_count = 0 for _ in range(int(rounds / 2)): game = GameState.new_game(args.board_size) while not game.is_over(): move = players[game.nplayer].select_move(game) game = game.apply_move(move) winner = game.winner() if winner == Player.black: win_count += 1 players = { Player.black : old_agent, Player.white : new_agent, } for _ in range(int(rounds / 2)): game = GameState.new_game(args.board_size) while not game.is_over(): move = players[game.nplayer].select_move(game) game = game.apply_move(move) winner = game.winner() if winner == Player.white: win_count += 1 new_agent.set_temperature(new_temp) old_agent.set_temperature(old_temp) print('win count: {}'.format(win_count)) if float(win_count) > rounds * acceptability: return True else: return False
def test_encode(self): encoder = get_encoder_by_name('sevenplane', 9) gs = GameState.new_game(9) gs = gs.apply_move(Move.play(Point(2, 7))) gs = gs.apply_move(Move.play(Point(7, 2))) gs = gs.apply_move(Move.play(Point(3, 6))) gs = gs.apply_move(Move.play(Point(6, 3))) gs = gs.apply_move(Move.play(Point(3, 7))) gs = gs.apply_move(Move.play(Point(2, 6))) gs = gs.apply_move(Move.play(Point(2, 5))) code = encoder.encode(gs) self.assertEqual(1., code[0][1][5])
def main(): args = parse_args() temperature = 0.3 agent_path = args.data_dir + '/agents/pg_' + args.model_size + '_' + args.encoder_name + '_' + str(args.board_size) + '.h5' print('Agent path: {}'.format(agent_path)) agent1, collector1 = setup_agent(agent_path, args) agent2, collector2 = setup_agent(agent_path, args) players = { Player.black : agent1, Player.white : agent2, } for i in range(args.rounds): round_no = i + 1 temperature = temperature_decay(temperature, round_no) if i % 10 == 0: print('Begin round {} selfplay. temperature {}'.format(round_no, temperature)) agent1.set_temperature(temperature) agent2.set_temperature(temperature) collector1.begin_episode() collector2.begin_episode() game = GameState.new_game(args.board_size) while not game.is_over(): move = players[game.nplayer].select_move(game) game = game.apply_move(move) winner = game.winner() if winner == Player.black: collector1.complete_episode(1.) collector2.complete_episode(-1.) else: collector1.complete_episode(-1.) collector2.complete_episode(1.) # learng and evaluate the experience if round_no % 1000 == 0: print('Begin round {} training'.format(round_no)) exp = combine_experience([collector1, collector2]) agent1.train(exp, args.learning_rate, batchsize=args.batchsize) print('Training complete.') # not worrying about improvement anymore because policy gradient has high # variance, difficult to improve in the begining agent1.serialize(h5py.File(agent_path, 'w')) agent2 = load_policy_agent(h5py.File(agent_path, 'r')) agent2.set_collector(collector2) #TODO: is it correct to clear experience ? collector1.clear() collector2.clear() agent1.serialize(h5py.File(agent_path, 'w')) print('PG Selfplay complete. agent is in {}'.format(agent_path))
def __init__(self, termination_agent, termination=None): self.agent = termination_agent self.game_state = GameState.new_game(19) self.input = sys.stdin self.output = sys.stdout self.stopped = False self.handlers = { 'boardsize': self.handle_boardsize, 'clear_board': self.handle_clear_board, 'fixed_handicap': self.handle_fixed_handicap, 'genmove': self.handle_genmove, 'known_command': self.handle_known_command, 'showboard': self.handle_showboard, 'time_settings': self.handle_time_settings, 'time_left': self.handle_time_left, 'play': self.handle_play, 'protocol_version': self.handle_protocol_version, 'quit': self.handle_quit, }
def __init__(self, gobot, termination=None, handicap=0, opponent='gnugo', output_sgf='out.sgf', our_color='b'): self.bot = gobot self.handicap = handicap self.stopped = False self.game_state = GameState.new_game(19) self.sgf = SGFWriter(output_sgf) self.our_color = Player.black if our_color == 'b' else Player.white self.their_color = self.our_color.other cmd = self.opponent_cmd(opponent) pipe = subprocess.PIPE self.gtp_stream = subprocess.Popen( cmd, stdin=pipe, stdout=pipe) #allow read/write to gtp stream through command line
def main(): args = parse_args() agent_path = args.data_dir + '/agents/zero_' + args.model_size + '_' + args.encoder_name + '_' + str( args.board_size) + '.h5' print('Agent path: {}'.format(agent_path)) agent1, collector1 = setup_agent(agent_path, args) agent2, collector2 = setup_agent(agent_path, args) players = { Player.black: agent1, Player.white: agent2, } for i in range(args.rounds): round_no = i + 1 print('Begin round {} selfplay.'.format(round_no)) collector1.begin_episode() collector2.begin_episode() game = GameState.new_game(args.board_size) while not game.is_over(): move = players[game.nplayer].select_move(game) game = game.apply_move(move) winner = game.winner() if winner == Player.black: collector1.complete_episode(1.) collector2.complete_episode(-1.) else: collector1.complete_episode(-1.) collector2.complete_episode(1.) # learng and evaluate the experience if round_no % 100 == 0: print('Begin round {} training'.format(round_no)) exp = combine_experience([collector1, collector2]) agent1.train(exp, args.learning_rate, batch_size=args.batchsize) print('Training complete.') # not worrying about improvement yet agent1.serialize(h5py.File(agent_path, 'w')) agent2 = load_zero_agent(h5py.File(agent_path, 'r')) agent2.set_collector(collector2) #TODO: is it correct to clear experience? collector1.clear() collector2.clear() agent1.serialize(h5py.File(agent_path, 'w')) print('AlphaGo Zero Selfplay complete. agent is in {}'.format(agent_path))
def generate_game(board_size, rounds, max_moves, temperature): boards, moves = [], [] encoder = get_encoder_by_name('oneplane', board_size) game = GameState.new_game(board_size) bot = MCTSAgent(100, 1., 1) num_moves = 0 while not game.is_over(): print_board(game.board) move = bot.select_move(game) if move.is_play: boards.append(encoder.encode(game)) move_one_hot = np.zeros(encoder.num_points()) move_one_hot[encoder.encode_point(move.pt)] = 1 moves.append(move_one_hot) print_move(game.nplayer, move) game = game.apply_move(move) num_moves += 1 if num_moves > max_moves: break return np.array(boards), np.array(moves)
def main(): args = parse_args() board_size = args.board_size agent = load_agent(args) random = FastRandomAgent(args.board_size) if args.playas == 'black': random_play = Player.black elif args.playas == 'white': random_play = Player.white else: raise ValueError('Unknown option for playas: {}'.format(args.playas)) win_count = 0 for _ in range(1000): game = GameState.new_game(board_size) while not game.is_over(): if game.nplayer == random_play: move = random.select_move(game) else: move = agent.select_move(game) game = game.apply_move(move) winner = game.winner() if winner is not None and winner != random_play: win_count += 1 print('Model won: {}/1000'.format(win_count))
def handle_clear_board(self): self.game_state = GameState.new_game(19) return response.success()