def test_random_vs_random(self): server = self._start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") bot1 = uniform_random.UniformRandomBot(0, np.random.RandomState()) bot2 = uniform_random.UniformRandomBot(0, np.random.RandomState()) self._play_one_game(game, bot1, bot2) game.exit() server.join()
def remote_random_vs_random(): print("remote_random_vs_random") server = start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") b1 = uniform_random.UniformRandomBot(0, np.random.RandomState()) b2 = uniform_random.UniformRandomBot(1, np.random.RandomState()) print_games_per_second(game, b1, b2, time_limit_s=3) game.exit() server.join()
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" rng = np.random.RandomState(FLAGS.seed) if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "az": model = az_model.Model.from_checkpoint(FLAGS.az_path) evaluator = az_evaluator.AlphaZeroEvaluator(game, model) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, child_selection_fn=mcts.SearchNode.puct_value, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "random": return uniform_random.UniformRandomBot(player_id, rng) if bot_type == "human": return human.HumanBot() if bot_type == "gtp": bot = gtp.GTPBot(game, FLAGS.gtp_path) for cmd in FLAGS.gtp_cmd: bot.gtp_cmd(cmd) return bot raise ValueError("Invalid bot type: %s" % bot_type)
def main(): game = pyspiel.load_game("tic_tac_toe") state = game.new_initial_state() mcts_bot = new_mcts_bot(game) random_bot = uniform_random.UniformRandomBot(1, np.random.RandomState()) players = [mcts_bot, random_bot] player_labels = ['mcts', 'random'] while not state.is_terminal(): print('current state:') print(state) if state.is_chance_node(): raise RuntimeError("didn't expect a chance node!") current_player_idx = state.current_player() current_player = players[current_player_idx] action = current_player.step(state) action_str = state.action_to_string(current_player_idx, action) print( f"Player {player_labels[current_player_idx]} action: {action_str}") state.apply_action(action) print() winner = player_labels[0] if state.returns()[0] > 0 else player_labels[0] print('final state:') print(state) print(f'winner: {winner}')
def test_random_vs_stateful(self): game = pyspiel.load_game("tic_tac_toe") bots = [ pyspiel.make_stateful_random_bot(game, 0, 1234), uniform_random.UniformRandomBot(1, np.random.RandomState(4321)) ] for _ in range(1000): evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random)
def main(argv): del argv game = pyspiel.load_game(FLAGS.game_name) # TODO(author1): Add support for bots from neural networks. bots = [ uniform_random.UniformRandomBot(game, i, random) for i in range(game.num_players()) ] scenarios.play_bot_in_scenarios(game, bots)
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count) return mcts.MCTSBot(game, player_id, FLAGS.uct_c, FLAGS.max_search_nodes, evaluator) if bot_type == "random": return uniform_random.UniformRandomBot(game, player_id, np.random) if bot_type == "human": return human.HumanBot(game, player_id) raise ValueError("Invalid bot type: %s" % bot_type)
def local_random_vs_mcts(): print("local_random_vs_mcts") game = pyspiel.load_game("tic_tac_toe") b1 = uniform_random.UniformRandomBot(0, np.random.RandomState()) b2 = mcts.MCTSBot( game, uct_c=math.sqrt(2), # starts beating random bot at ~ 3 sims, 1 rollout max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) print_games_per_second(game, b1, b2, time_limit_s=3)
def test_policy_is_uniform(self): game = pyspiel.load_game("leduc_poker") bots = [ uniform_random.UniformRandomBot(game, 0, random), uniform_random.UniformRandomBot(game, 1, random) ] # deal each player a card state = game.new_initial_state() state.apply_action(2) state.apply_action(4) # p0 starts: uniform from [check, bet] policy, _ = bots[0].step(state) self.assertCountEqual(policy, [(1, 0.5), (2, 0.5)]) # Afte p0 bets, p1 chooses from [fold, call, raise] state.apply_action(2) policy, _ = bots[1].step(state) self.assertCountEqual(policy, [(0, 1 / 3), (1, 1 / 3), (2, 1 / 3)])
def test_python_and_cpp_bot(self): game = pyspiel.load_game("kuhn_poker") bots = [ pyspiel.make_uniform_random_bot(0, 1234), uniform_random.UniformRandomBot(1, np.random.RandomState(4321)), ] results = np.array([ pyspiel.evaluate_bots(game.new_initial_state(), bots, iteration) for iteration in range(10000) ]) average_results = np.mean(results, axis=0) np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1)
def test_no_legal_actions(self): game = pyspiel.load_game("kuhn_poker") bot = uniform_random.UniformRandomBot(game, 0, random) state = game.new_initial_state() state.apply_action(2) # deal state.apply_action(1) # deal state.apply_action(1) # bet state.apply_action(0) # fold bot.restart(state) policy, action = bot.step(state) self.assertEqual(policy, []) self.assertEqual(action, pyspiel.INVALID_ACTION)
def LoadAgent(agent_type, game, player_id, rng): """Return a bot based on the agent type.""" if agent_type == "random": return uniform_random.UniformRandomBot(player_id, rng) elif agent_type == "human": return human.HumanBot() elif agent_type == "check_call": policy = pyspiel.PreferredActionPolicy([1, 0]) return pyspiel.make_policy_bot(game, player_id, FLAGS.seed, policy) elif agent_type == "fold": policy = pyspiel.PreferredActionPolicy([0, 1]) return pyspiel.make_policy_bot(game, player_id, FLAGS.seed, policy) else: raise RuntimeError("Unrecognized agent type: {}".format(agent_type))
def test_mcts_vs_random_game(self): server = self._start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") mcts_bot = mcts.MCTSBot( game, uct_c=math.sqrt(2), max_simulations=2, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=1)) random_bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) self._play_one_game(game, mcts_bot, random_bot) game.exit() server.join()
def main(): server = GameServer("tcp://*:5555", pyspiel.load_game("tic_tac_toe")) server_process = Process(target=server.run) server_process.start() bot1 = uniform_random.UniformRandomBot(0, np.random.RandomState()) bot2 = uniform_random.UniformRandomBot(1, np.random.RandomState()) players = [bot1, bot2] game = NetworkGame("tcp://localhost:5555") state = game.new_initial_state() while not state.is_terminal(): print('state:') print(state) current_player = players[state.current_player()] print('current player', state.current_player()) action = current_player.step(state) print('bot action:', action) state.apply_action(action) game.exit() server_process.join()
def random_vs_remote_mcts(): print("random_vs_remote_mcts") server = start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") random_bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) mcts_bot = mcts.MCTSBot( game, uct_c=math.sqrt(2), # starts beating random bot at ~ 3 sims, 1 rollout max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) print_games_per_second(game, random_bot, mcts_bot, time_limit_s=3) game.exit() server.join()
def main(unused_argv): game = pyspiel.load_game(FLAGS.game) state = game.new_initial_state() print("Initial state: ") print(str(state)) # Create human bot human_bot = human.HumanBot(game, FLAGS.human_player) # Create random bot random_bot = uniform_random.UniformRandomBot(game, 1 - FLAGS.human_player, np.random) if FLAGS.human_player == 0: bots = [human_bot, random_bot] else: bots = [random_bot, human_bot] while not state.is_terminal(): # The state can be three different types: chance node, # simultaneous node, or decision node if state.is_chance_node(): # Chance node: sample an outcome outcomes = state.chance_outcomes() num_actions = len(outcomes) print("Chance node, got " + str(num_actions) + " outcomes") action_list, prob_list = zip(*outcomes) action = np.random.choice(action_list, p=prob_list) print("Sampled outcome: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) elif state.is_simultaneous_node(): raise ValueError("Game cannot have simultaneous nodes.") else: # Decision node: sample action for the single current player _, action = bots[state.current_player()].step(state) print("Player ", state.current_player(), ", chose action: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) print(str(state)) # Game is now done. Print return for each player returns = state.returns() for pid in range(game.num_players()): print("Return for player {} is {}".format(pid, returns[pid]))
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" rng = np.random.RandomState(FLAGS.seed) if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "random": return uniform_random.UniformRandomBot(player_id, rng) if bot_type == "human": return human.HumanBot() raise ValueError("Invalid bot type: %s" % bot_type)
def mcts_incremental_vs_rando(): # Play mcts vs random player, with incrementally increasing MCTS simulations # and rollouts. # Takes ages! for num_sims in range(2, 11): for num_rollouts in range(1, 11): start = datetime.now() print(f'sims: {num_sims}, rollouts: {num_rollouts}') players = [ new_mcts_bot(game, num_sims, mcts.RandomRolloutEvaluator(n_rollouts=num_rollouts)), uniform_random.UniformRandomBot(1, np.random.RandomState()) ] state = play_one_game(players) is_draw = winner_idx(state) is None mcts_won = winner_idx(state) == 0 result = 'draw' if is_draw else 'mcts: win' if mcts_won else 'mcts: lose' print(f'game over in {datetime.now() - start}, result: {result}', flush=True)
class EvaluateBotsTest(parameterized.TestCase): @parameterized.parameters([([ pyspiel.make_uniform_random_bot(0, 1234), uniform_random.UniformRandomBot(1, np.random.RandomState(4321)) ], ), (policy_bots(), )]) def test_cpp_vs_python(self, bots): results = np.array([ evaluate_bots.evaluate_bots(GAME.new_initial_state(), bots, np.random) for _ in range(10000) ]) average_results = np.mean(results, axis=0) np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1) def test_random_vs_stateful(self): game = pyspiel.load_game("tic_tac_toe") bots = [ pyspiel.make_stateful_random_bot(game, 0, 1234), uniform_random.UniformRandomBot(1, np.random.RandomState(4321)) ] for _ in range(1000): evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random)
def mcts_vs_random(): play_one_game_and_print_results([ new_mcts_bot(game, 2, mcts.RandomRolloutEvaluator(n_rollouts=1)), uniform_random.UniformRandomBot(1, np.random.RandomState()) ])
def mcts_andoma_vs_random(): play_one_game_and_print_results([ new_mcts_bot(game, 2, AndomaValuesRolloutEvaluator()), uniform_random.UniformRandomBot(1, np.random.RandomState()) ])
def andoma_vs_random(): play_one_game_and_print_results([ andoma.AndomaBot(search_depth=1), uniform_random.UniformRandomBot(1, np.random.RandomState()) ])
def local_random_vs_random(): print("local_random_vs_random") game = pyspiel.load_game("tic_tac_toe") b1 = uniform_random.UniformRandomBot(0, np.random.RandomState()) b2 = uniform_random.UniformRandomBot(1, np.random.RandomState()) print_games_per_second(game, b1, b2, time_limit_s=3)