def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" rng = np.random.RandomState(FLAGS.seed) if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "az": model = az_model.Model.from_checkpoint(FLAGS.az_path) evaluator = az_evaluator.AlphaZeroEvaluator(game, model) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, child_selection_fn=mcts.SearchNode.puct_value, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "random": return uniform_random.UniformRandomBot(player_id, rng) if bot_type == "human": return human.HumanBot() if bot_type == "gtp": bot = gtp.GTPBot(game, FLAGS.gtp_path) for cmd in FLAGS.gtp_cmd: bot.gtp_cmd(cmd) return bot raise ValueError("Invalid bot type: %s" % bot_type)
def test_can_play_tic_tac_toe(self): game = pyspiel.load_game("tic_tac_toe") max_simulations = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) bots = [ mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), ] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(v[0] + v[1], 0)
def test_can_play_three_player_stochastic_games(self): game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)") max_simulations = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=5) bots = [ mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), ] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(sum(v), 0)
def test_can_play_tic_tac_toe(self): game = pyspiel.load_game("tic_tac_toe") uct_c = math.sqrt(2) max_search_nodes = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) bots = [ mcts.MCTSBot(game, 0, uct_c, max_search_nodes, evaluator), mcts.MCTSBot(game, 1, uct_c, max_search_nodes, evaluator), ] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(v[0] + v[1], 0)
def test_can_play_three_player_game(self): game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)") uct_c = math.sqrt(2) max_search_nodes = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=5) bots = [ mcts.MCTSBot(game, 0, uct_c, max_search_nodes, evaluator), mcts.MCTSBot(game, 1, uct_c, max_search_nodes, evaluator), mcts.MCTSBot(game, 2, uct_c, max_search_nodes, evaluator), ] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(sum(v), 0)
def test_zero_vs_mcts(policy_fn, max_search_nodes, game_name, **kwargs): game = pyspiel.load_game(game_name) # Alphazero first zero_bot = AlphaZeroBot(game, 0, policy_fn=policy_fn, use_dirichlet=False, **kwargs) mcts_bot = mcts.MCTSBot(game, 1, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score1 = play_game(game, zero_bot, mcts_bot) # Random bot first zero_bot = AlphaZeroBot(game, 1, policy_fn=policy_fn, use_dirichlet=False, **kwargs) mcts_bot = mcts.MCTSBot(game, 0, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score2 = -play_game(game, mcts_bot, zero_bot) return score1, score2, None
def test_net_vs_mcts(policy_fn, max_search_nodes, game_name, **kwargs): game = pyspiel.load_game(game_name) # Alphazero first zero_bot = NeuralNetBot(game, 0, policy_fn) mcts_bot = mcts.MCTSBot(game, 1, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score1 = play_game(game, zero_bot, mcts_bot) # Random bot first zero_bot = NeuralNetBot(game, 1, policy_fn) mcts_bot = mcts.MCTSBot(game, 0, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score2 = -play_game(game, mcts_bot, zero_bot) return score1, score2, None
def test_can_play_single_player(self): game = pyspiel.load_game("catch") max_simulations = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) bots = [mcts.MCTSBot(game, UCT_C, max_simulations, evaluator)] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertGreater(v[0], 0)
def new_mcts_bot(game, max_sims, evaluator): if max_sims < 2: raise RuntimeError('max_sims must be > 1 ... I think the implementation is broken') return mcts.MCTSBot( game, uct_c=math.sqrt(2), max_simulations=max_sims, evaluator=evaluator)
def test_works_with_mcts(self): game = pyspiel.load_game("tic_tac_toe") model = build_model(game) evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) bot = mcts.MCTSBot( game, 1., 20, evaluator, solve=False, dirichlet_noise=(0.25, 1.)) root = bot.mcts_search(game.new_initial_state()) self.assertEqual(root.explore_count, 20)
def test_throws_on_simultaneous_game(self): game = pyspiel.load_game("matrix_mp") evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) with self.assertRaises(ValueError): mcts.MCTSBot(game, 0, UCT_C, max_simulations=100, evaluator=evaluator)
def main(_): game = pyspiel.load_game("tic_tac_toe") # 1. Define a model model = model_lib.Model( FLAGS.nn_model, game.observation_tensor_shape(), game.num_distinct_actions(), nn_width=FLAGS.nn_width, nn_depth=FLAGS.nn_depth, weight_decay=1e-4, learning_rate=0.01, path=None) print("Model type: {}({}, {}), size: {} variables".format( FLAGS.nn_model, FLAGS.nn_width, FLAGS.nn_depth, model.num_trainable_variables)) # 2. Create an MCTS bot using the model evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) bot = mcts.MCTSBot(game, 1., 20, evaluator, solve=False, dirichlet_noise=(0.25, 1.)) # 3. Build an AlphaZero instance a0 = alpha_zero.AlphaZero(game, bot, model, replay_buffer_capacity=FLAGS.replay_buffer_capacity, action_selection_transition=4) # 4. Create a bot using min-max search. It can never lose tic-tac-toe, so # a success condition for our AlphaZero bot is to draw all games with it. minimax_bot = MinimaxBot(game) # 5. Run training loop for num_round in range(FLAGS.num_rounds): logging.info("------------- Starting round %s out of %s -------------", num_round, FLAGS.num_rounds) if num_round % FLAGS.evaluation_frequency == 0: num_evaluations = 50 logging.info("Playing %s games against the minimax player.", num_evaluations) (_, losses, draws) = bot_evaluation(game, [minimax_bot, a0.bot], num_evaluations=50) logging.info("Result against Minimax player: %s losses and %s draws.", losses, draws) logging.info("Running %s games of self play", FLAGS.num_self_play_games) a0.self_play(num_self_play_games=FLAGS.num_self_play_games) logging.info("Training the net for %s epochs.", FLAGS.num_training_epochs) a0.update(FLAGS.num_training_epochs, batch_size=FLAGS.batch_size, verbose=True) logging.info("Cache: %s", evaluator.cache_info()) evaluator.clear_cache()
def main(unused_argv): uct_c = 2 game = pyspiel.load_game(FLAGS.game) state = game.new_initial_state() print("Initial state: ", str(state)) # Check that the games satisfies the conditions for the implemented MCTS # algorithm if game.num_players() not in (1, 2): raise ValueError("Game must be a 1-player game or 2-player zero-sum game") if (game.num_players() == 2 and game.get_type().utility != pyspiel.GameType.Utility.ZERO_SUM): raise ValueError("Game must be a 1-player game or 2-player zero-sum game") # Create MCTS bot evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count) mcts_bot = mcts.MCTSBot(game, FLAGS.mcts_player, uct_c, FLAGS.max_search_nodes, evaluator) # Create random bot random_bot = pyspiel.make_uniform_random_bot(game, 1 - FLAGS.mcts_player, 123) if FLAGS.mcts_player == 0: bots = [mcts_bot, random_bot] else: bots = [random_bot, mcts_bot] while not state.is_terminal(): # The state can be three different types: chance node, # simultaneous node, or decision node if state.is_chance_node(): # Chance node: sample an outcome outcomes = state.chance_outcomes() num_actions = len(outcomes) print("Chance node, got " + str(num_actions) + " outcomes") action_list, prob_list = zip(*outcomes) action = np.random.choice(action_list, p=prob_list) print("Sampled outcome: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) elif state.is_simultaneous_node(): raise ValueError("Game cannot have simultaneous nodes.") else: # Decision node: sample action for the single current player _, action = bots[state.current_player()].step(state) print("Player ", state.current_player(), ", randomly sampled action: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) print("Next state: ", str(state)) # Game is now done. Print return for each player returns = state.returns() for pid in range(game.num_players()): print("Return for player {} is {}".format(pid, returns[pid]))
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count) return mcts.MCTSBot(game, player_id, FLAGS.uct_c, FLAGS.max_search_nodes, evaluator) if bot_type == "random": return uniform_random.UniformRandomBot(game, player_id, np.random) if bot_type == "human": return human.HumanBot(game, player_id) raise ValueError("Invalid bot type: %s" % bot_type)
def local_random_vs_mcts(): print("local_random_vs_mcts") game = pyspiel.load_game("tic_tac_toe") b1 = uniform_random.UniformRandomBot(0, np.random.RandomState()) b2 = mcts.MCTSBot( game, uct_c=math.sqrt(2), # starts beating random bot at ~ 3 sims, 1 rollout max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) print_games_per_second(game, b1, b2, time_limit_s=3)
def _init_bot(config, game, evaluator_, evaluation): """Initializes a bot.""" noise = None if evaluation else (config.policy_epsilon, config.policy_alpha) return mcts.MCTSBot( game, config.uct_c, config.max_simulations, evaluator_, solve=False, dirichlet_noise=noise, child_selection_fn=mcts.SearchNode.puct_value, verbose=False)
def __init__(self, rollout_count=5, max_simulations=4000): rng = np.random.RandomState(None) evaluator = mcts.RandomRolloutEvaluator(rollout_count, rng) self.game = pyspiel.load_game("geodesic_y") self.state = self.game.new_initial_state() self.agent = mcts.MCTSBot(self.game, 2, max_simulations, evaluator, random_state=rng, solve=True, verbose=False)
def search_tic_tac_toe_state(initial_actions): game = pyspiel.load_game("tic_tac_toe") state = game.new_initial_state() for action_str in initial_actions.split(" "): state.apply_action(_get_action(state, action_str)) bot = mcts.MCTSBot(game, player=state.current_player(), uct_c=math.sqrt(2), max_simulations=10000, solve=True, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20)) return bot.mcts_search(state), state
def search_tic_tac_toe_state(initial_actions): game = pyspiel.load_game("tic_tac_toe") state = game.new_initial_state() for action_str in initial_actions.split(" "): state.apply_action(_get_action(state, action_str)) rng = np.random.RandomState(42) bot = mcts.MCTSBot(game, UCT_C, max_simulations=10000, solve=True, random_state=rng, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20, random_state=rng)) return bot.mcts_search(state), state
def test_mcts_vs_random_game(self): server = self._start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") mcts_bot = mcts.MCTSBot( game, uct_c=math.sqrt(2), max_simulations=2, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=1)) random_bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) self._play_one_game(game, mcts_bot, random_bot) game.exit() server.join()
def random_vs_remote_mcts(): print("random_vs_remote_mcts") server = start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") random_bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) mcts_bot = mcts.MCTSBot( game, uct_c=math.sqrt(2), # starts beating random bot at ~ 3 sims, 1 rollout max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) print_games_per_second(game, random_bot, mcts_bot, time_limit_s=3) game.exit() server.join()
def main(): game = NetworkGame("tcp://localhost:5555") # bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) bot = mcts.MCTSBot(game, uct_c=math.sqrt(2), max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) state = game.new_initial_state() while not state.is_terminal(): action = bot.step(state) print('bot action:', action) state.apply_action(action) game.exit() print("done") print(state)
def evaluator(*, game, config, logger, checkpoint, queue): """A process that plays the latest checkpoint vs standard MCTS.""" results = Buffer(config.evaluation_window) logger.print("Initializing model") # Load a new model if there's not a checkpoint, otherwise load the checkpoint. if checkpoint is None: model = _init_model_from_config(config) else: model = _init_model_from_checkpoint(checkpoint, config.path) logger.print("Initializing bots") az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) random_evaluator = mcts.RandomRolloutEvaluator() for game_num in itertools.count(): if not update_checkpoint(logger, queue, model, az_evaluator): return az_player = game_num % 2 difficulty = (game_num // 2) % config.eval_levels max_simulations = int(config.max_simulations * (10**(difficulty / 2))) bots = [ _init_bot(config, game, az_evaluator, True), mcts.MCTSBot(game, config.uct_c, max_simulations, random_evaluator, solve=True, verbose=False) ] if az_player == 1: bots = list(reversed(bots)) trajectory = _play_game(logger, game_num, game, bots, temperature=1, temperature_drop=0) results.append(trajectory.returns[az_player]) queue.put((difficulty, trajectory.returns[az_player])) logger.print("AZ: {}, MCTS: {}, AZ avg/{}: {:.3f}".format( trajectory.returns[az_player], trajectory.returns[1 - az_player], len(results), np.mean(results.data)))
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" rng = np.random.RandomState(FLAGS.seed) if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "random": return uniform_random.UniformRandomBot(player_id, rng) if bot_type == "human": return human.HumanBot() raise ValueError("Invalid bot type: %s" % bot_type)
def evaluator(*, game, config, logger, num, queue): """A process that plays the latest checkpoint vs standard MCTS.""" max_simulations = config.max_simulations * (3**num) logger.print("Running MCTS with", max_simulations, "simulations") results = Buffer(config.evaluation_window) logger.print("Initializing model") model = _init_model_from_config(config) logger.print("Initializing bots") az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) random_evaluator = mcts.RandomRolloutEvaluator() az_player = 0 bots = [ _init_bot(config, game, az_evaluator, True), mcts.MCTSBot(game, config.uct_c, max_simulations, random_evaluator, solve=True, verbose=False) ] for game_num in itertools.count(): if not update_checkpoint(logger, queue, model, az_evaluator): return trajectory = _play_game(logger, game_num, game, bots, temperature=1, temperature_drop=0) results.append(trajectory.returns[az_player]) logger.print("AZ: {}, MCTS: {}, AZ avg/{}: {:.3f}".format( trajectory.returns[az_player], trajectory.returns[1 - az_player], len(results), np.mean(results.data))) # Swap players for the next game bots = list(reversed(bots)) az_player = 1 - az_player
def main(_): game = pyspiel.load_game("tic_tac_toe") num_actions = game.num_distinct_actions() observation_shape = game.observation_tensor_shape() # 1. Define a keras net if FLAGS.net_type == "resnet": net = alpha_zero.keras_resnet( observation_shape, num_actions, num_residual_blocks=1, num_filters=256, data_format="channels_first") elif FLAGS.net_type == "mlp": net = alpha_zero.keras_mlp( observation_shape, num_actions, num_layers=2, num_hidden=64) else: raise ValueError(("Invalid value for 'net_type'. Must be either 'mlp' or " "'resnet', but was %s") % FLAGS.net_type) model = alpha_zero.Model( net, l2_regularization=1e-4, learning_rate=0.01, device=FLAGS.device) # 2. Create an MCTS bot using the previous keras net evaluator = alpha_zero.AlphaZeroKerasEvaluator(game, model) bot = mcts.MCTSBot(game, 1., 20, evaluator, solve=False, dirichlet_noise=(0.25, 1.)) # 3. Build an AlphaZero instance a0 = alpha_zero.AlphaZero(game, bot, model, replay_buffer_capacity=FLAGS.replay_buffer_capacity, action_selection_transition=4) # 4. Create a bot using min-max search. It can never lose tic-tac-toe, so # a success condition for our AlphaZero bot is to draw all games with it. minimax_bot = MinimaxBot(game) # 5. Run training loop for num_round in range(FLAGS.num_rounds): logging.info("------------- Starting round %s out of %s -------------", num_round, FLAGS.num_rounds) if num_round % FLAGS.evaluation_frequency == 0: num_evaluations = 50 logging.info("Playing %s games against the minimax player.", num_evaluations) (_, losses, draws) = bot_evaluation(game, [minimax_bot, a0.bot], num_evaluations=50) logging.info("Result against Minimax player: %s losses and %s draws.", losses, draws) logging.info("Running %s games of self play", FLAGS.num_self_play_games) a0.self_play(num_self_play_games=FLAGS.num_self_play_games) logging.info("Training the net for %s epochs.", FLAGS.num_training_epochs) a0.update(FLAGS.num_training_epochs, batch_size=FLAGS.batch_size, verbose=True) evaluator.value_and_prior.cache_clear()
def main(_): game = pyspiel.load_game("tic_tac_toe") num_actions = game.num_distinct_actions() observation_shape = game.observation_tensor_shape() # 1. Define a keras net if FLAGS.net_type == "resnet": feature_extractor = None net = alpha_zero.keras_resnet(observation_shape, num_actions=num_actions, num_residual_blocks=1, num_filters=256, data_format="channels_first") elif FLAGS.net_type == "mlp": # The full length-27 observation_tensor could be trained on. But this # demonstrates the use of a custom feature extractor, and the training # can be faster with this smaller feature representation. feature_extractor = mlp_feature_extractor net = alpha_zero.keras_mlp(9, num_actions, num_layers=2, num_hidden=64) else: raise ValueError( ("Invalid value for 'net_type'. Must be either 'mlp' or " "'resnet', but was %s") % FLAGS.net_type) # 2. Create an MCTS bot using the previous keras net evaluator = alpha_zero.AlphaZeroKerasEvaluator( net, optimizer=tf.train.AdamOptimizer(learning_rate=0.01), device=FLAGS.device, feature_extractor=feature_extractor) bot = mcts.MCTSBot(game, 1., 20, evaluator, solve=False, dirichlet_noise=(0.25, 1.)) # 3. Build an AlphaZero instance a0 = alpha_zero.AlphaZero( game, bot, replay_buffer_capacity=FLAGS.replay_buffer_capacity, action_selection_transition=4) # 4. Create a bot using min-max search. It can never lose tic-tac-toe, so # a success condition for our AlphaZero bot is to draw all games with it. minimax_bot = MinimaxBot(game) # 5. Run training loop for num_round in range(FLAGS.num_rounds): logging.info("------------- Starting round %s out of %s -------------", num_round, FLAGS.num_rounds) if num_round % FLAGS.evaluation_frequency == 0: num_evaluations = 50 logging.info("Playing %s games against the minimax player.", num_evaluations) (_, losses, draws) = bot_evaluation(game, [minimax_bot, a0.bot], num_evaluations=50) logging.info( "Result against Minimax player: %s losses and %s draws.", losses, draws) logging.info("Running %s games of self play", FLAGS.num_self_play_games) a0.self_play(num_self_play_games=FLAGS.num_self_play_games) logging.info("Training the net for %s epochs.", FLAGS.num_training_epochs) a0.update(FLAGS.num_training_epochs, batch_size=FLAGS.batch_size, verbose=True)
def __init__(self, idx, max_simulations=500): super().__init__(idx) game = pyspiel.load_game("quoridor") self.evaluator = RandomRolloutEvaluator(2) self.bot = mcts.MCTSBot(game, UCT_C, max_simulations, self.evaluator)