from absl import app
from absl import flags

import haiku as hk
import jax
from jax import numpy as jnp
from jax.experimental import optix
import numpy as np

import pyspiel

OptState = Any
Params = Any

FLAGS = flags.FLAGS
GAME = pyspiel.load_game('hearts')
NUM_CARDS = 52
NUM_ACTIONS = NUM_CARDS
NUM_PLAYERS = 4
TOP_K_ACTIONS = 5  # How many alternative actions to display
DEFAULT_LAYER_SIZES = [1024, 1024, 1024, 1024]

flags.DEFINE_integer('iterations', 100000, 'Number of iterations')
flags.DEFINE_string('data_path', None, 'Location for data')
flags.DEFINE_integer('eval_every', 10000, 'How often to evaluate the policy')
flags.DEFINE_integer('num_examples', 3,
                     'How many examples to print per evaluation')
flags.DEFINE_integer('train_batch', 128, 'Batch size for training step')
flags.DEFINE_integer('eval_batch', 10000, 'Batch size when evaluating')
flags.DEFINE_float('step_size', 1e-4, 'Step size for training')
flags.DEFINE_list('hidden_layer_sizes', None,
Example #2
0
 def test_child_function_failure_behavior_for_sim_game(self):
     """Test failure behavior of child on simultaneous games."""
     game = pyspiel.load_game("python_iterated_prisoners_dilemma")
     parameter_state = game.new_initial_state()
     with self.assertRaises(AssertionError):
         policy.child(parameter_state, 0)
Example #3
0
 def test_tic_tac_toe(self):
   game = pyspiel.load_game("tic_tac_toe")
   state = game.new_initial_state()
   self.assertFalse(state.is_chance_node())
   self.assertFalse(state.is_terminal())
   self.assertEqual(state.legal_actions(), [0, 1, 2, 3, 4, 5, 6, 7, 8])
Example #4
0
  def test_evaluator_caching(self):
    game = pyspiel.load_game("tic_tac_toe")
    model = build_model(game)
    evaluator = evaluator_lib.AlphaZeroEvaluator(game, model)

    state = game.new_initial_state()
    obs = state.observation_tensor()
    act_mask = state.legal_actions_mask()
    action = state.legal_actions()[0]
    policy = np.zeros(len(act_mask), dtype=float)
    policy[action] = 1
    train_inputs = [model_lib.TrainInput(obs, act_mask, policy, value=1)]

    value = evaluator.evaluate(state)
    self.assertEqual(value[0], -value[1])
    value = value[0]

    value2 = evaluator.evaluate(state)[0]
    self.assertEqual(value, value2)

    prior = evaluator.prior(state)
    prior2 = evaluator.prior(state)
    np.testing.assert_array_equal(prior, prior2)

    info = evaluator.cache_info()
    self.assertEqual(info.misses, 1)
    self.assertEqual(info.hits, 3)

    for _ in range(20):
      model.update(train_inputs)

    # Still equal due to not clearing the cache
    value3 = evaluator.evaluate(state)[0]
    self.assertEqual(value, value3)

    info = evaluator.cache_info()
    self.assertEqual(info.misses, 1)
    self.assertEqual(info.hits, 4)

    evaluator.clear_cache()

    info = evaluator.cache_info()
    self.assertEqual(info.misses, 0)
    self.assertEqual(info.hits, 0)

    # Now they differ from before
    value4 = evaluator.evaluate(state)[0]
    value5 = evaluator.evaluate(state)[0]
    self.assertNotEqual(value, value4)
    self.assertEqual(value4, value5)

    info = evaluator.cache_info()
    self.assertEqual(info.misses, 1)
    self.assertEqual(info.hits, 1)

    value6 = evaluator.evaluate(game.new_initial_state())[0]
    self.assertEqual(value4, value6)

    info = evaluator.cache_info()
    self.assertEqual(info.misses, 1)
    self.assertEqual(info.hits, 2)
Example #5
0
 def setUpClass(cls):
     super(TabularTicTacToePolicyTest, cls).setUpClass()
     cls.game = pyspiel.load_game("tic_tac_toe")
     cls.tabular_policy = policy.TabularPolicy(cls.game)
Example #6
0
 def test_breakthrough(self):
   # make a smaller (6x6) board
   game = pyspiel.load_game("breakthrough(rows=6,columns=6)")
   self.sim_game(game)
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl.testing import absltest

import tensorflow.compat.v1 as tf

from open_spiel.python.algorithms import neurd
import pyspiel

tf.enable_eager_execution()

_GAME = pyspiel.load_game('kuhn_poker')


def _new_model():
    return neurd.DeepNeurdModel(_GAME,
                                num_hidden_layers=1,
                                num_hidden_units=13,
                                num_hidden_factors=1,
                                use_skip_connections=True,
                                autoencode=True)


class NeurdTest(tf.test.TestCase):
    def setUp(self):
        super(NeurdTest, self).setUp()
        tf.set_random_seed(42)
 def test_contract_names(self):
     game = pyspiel.load_game('bridge(use_double_dummy_result=false)')
     self.assertEqual(game.contract_string(0), 'Passed Out')
     self.assertEqual(game.contract_string(38), '1SX N')
Example #9
0
def main(_):
    game = pyspiel.load_game(FLAGS.game)
    print("loaded game")

    # convert game to matrix form if it isn't already a matrix game
    if not isinstance(game, pyspiel.MatrixGame):
        game = pyspiel.extensive_to_matrix_game(game)
        num_rows, num_cols = game.num_rows(), game.num_cols()
        print("converted to matrix form with shape (%d, %d)" %
              (num_rows, num_cols))

    # use iterated dominance to reduce the space unless the solver is LP (fast)
    if FLAGS.solver != "linear":
        if FLAGS.mode == "all":
            game, _ = lp_solver.iterated_dominance(
                game, tol=FLAGS.tol, mode=lp_solver.DOMINANCE_STRICT)
            num_rows, num_cols = game.num_rows(), game.num_cols()
            print(
                "discarded strictly dominated actions yielding shape (%d, %d)"
                % (num_rows, num_cols))
        if FLAGS.mode == "one":
            game, _ = lp_solver.iterated_dominance(
                game, tol=FLAGS.tol, mode=lp_solver.DOMINANCE_VERY_WEAK)
            num_rows, num_cols = game.num_rows(), game.num_cols()
            print(
                "discarded very weakly dominated actions yielding shape (%d, %d)"
                % (num_rows, num_cols))

    # game is now finalized
    num_rows, num_cols = game.num_rows(), game.num_cols()
    row_actions = [game.row_action_name(row) for row in range(num_rows)]
    col_actions = [game.col_action_name(col) for col in range(num_cols)]
    row_payoffs, col_payoffs = utils.game_payoffs_array(game)
    pure_nash = list(
        zip(*((row_payoffs >= row_payoffs.max(0, keepdims=True) - FLAGS.tol)
              & (col_payoffs >= col_payoffs.max(1, keepdims=True) - FLAGS.tol)
              ).nonzero()))
    if pure_nash:
        print("found %d pure equilibria" % len(pure_nash))
    if FLAGS.mode == "pure":
        if not pure_nash:
            print("found no pure equilibria")
            return
        print("pure equilibria:")
        for row, col in pure_nash:
            print("payoffs %f, %f:" %
                  (row_payoffs[row, col], col_payoffs[row, col]))
            print("row action:")
            print(row_actions[row])
            print("col action:")
            print(col_actions[col])
            print("")
        return
    if FLAGS.mode == "one" and pure_nash:
        print("pure equilibrium:")
        row, col = pure_nash[0]
        print("payoffs %f, %f:" %
              (row_payoffs[row, col], col_payoffs[row, col]))
        print("row action:")
        print(row_actions[row])
        print("col action:")
        print(col_actions[col])
        print("")
        return
    for row, action in enumerate(row_actions):
        print("row action %s:" % row)
        print(action)
    print("--")
    for col, action in enumerate(col_actions):
        print("col action %s:" % col)
        print(action)
    print("--")
    if num_rows == 1 or num_cols == 1:
        equilibria = itertools.product(np.eye(num_rows), np.eye(num_cols))
    elif FLAGS.solver == "linear":
        if FLAGS.mode != "one" or (row_payoffs + col_payoffs).max() > (
                row_payoffs + col_payoffs).min() + FLAGS.tol:
            raise ValueError(
                "can't use linear solver for non-constant-sum game or "
                "for finding all optima!")
        print("using linear solver")

        def gen():
            p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game(
                pyspiel.create_matrix_game(row_payoffs - col_payoffs,
                                           col_payoffs - row_payoffs))
            yield (np.squeeze(p0_sol, 1), np.squeeze(p1_sol, 1))

        equilibria = gen()
    elif FLAGS.solver == "lrsnash":
        print("using lrsnash solver")
        equilibria = lrs_solve(row_payoffs, col_payoffs)
    elif FLAGS.solver == "nashpy":
        if FLAGS.mode == "all":
            print("using nashpy vertex enumeration")
            equilibria = nashpy.Game(row_payoffs,
                                     col_payoffs).vertex_enumeration()
        else:
            print("using nashpy Lemke-Howson solver")
            equilibria = lemke_howson_solve(row_payoffs, col_payoffs)
    print("equilibria:" if FLAGS.mode == "all" else "an equilibrium:")
    equilibria = iter(equilibria)
    # check that there's at least one equilibrium
    try:
        equilibria = itertools.chain([next(equilibria)], equilibria)
    except StopIteration:
        print("not found!")
    for row_mixture, col_mixture in equilibria:
        print("payoffs %f, %f for %s, %s" %
              (row_mixture.dot(row_payoffs.dot(col_mixture)),
               row_mixture.dot(
                   col_payoffs.dot(col_mixture)), row_mixture, col_mixture))
        if FLAGS.mode == "one":
            return
Example #10
0
 def test_discounted_cfr_runs_against_leduc(self):
     game = pyspiel.load_game("leduc_poker")
     solver = discounted_cfr.DCFRSolver(game)
     for _ in range(10):
         solver.evaluate_and_update_policy()
     solver.average_policy()
Example #11
0
 def test_private_observation(self):
     game = pyspiel.load_game('bridge(use_double_dummy_result=false)')
     state = game.new_initial_state()
     #         S T3
     #         H QT42
     #         D A82
     #         C A632
     # S KJ5           S Q7
     # H A965          H KJ8
     # D Q43           D KJT5
     # C T87           C Q954
     #         S A98642
     #         H 73
     #         D 976
     #         C KJ
     for a in [
             49, 45, 31, 5, 10, 40, 27, 47, 35, 38, 17, 14, 0, 33, 21, 39,
             34, 12, 22, 41, 1, 13, 36, 9, 4, 46, 11, 32, 2, 37, 29, 30, 7,
             8, 19, 24, 16, 43, 51, 15, 48, 23, 6, 20, 42, 26, 44, 50, 25,
             28, 3, 18
     ]:
         state.apply_action(a)
     obs = state.private_observation_tensor(0)
     self.assertLen(obs, game.private_observation_tensor_size())
     self.assertEqual(
         obs,
         [
             1.0,
             1.0,
             1.0,
             0.0,  # C2, D2, H2
             1.0,
             0.0,
             0.0,
             1.0,  # C3, S3
             0.0,
             0.0,
             1.0,
             0.0,  # H4
             0.0,
             0.0,
             0.0,
             0.0,  # No 5s
             1.0,
             0.0,
             0.0,
             0.0,  # C6
             0.0,
             0.0,
             0.0,
             0.0,  # No 7s
             0.0,
             1.0,
             0.0,
             0.0,  # D8
             0.0,
             0.0,
             0.0,
             0.0,  # No 9s
             0.0,
             0.0,
             1.0,
             1.0,  # H10, S10
             0.0,
             0.0,
             0.0,
             0.0,  # No Jacks
             0.0,
             0.0,
             1.0,
             0.0,  # HQ
             0.0,
             0.0,
             0.0,
             0.0,  # No kings
             1.0,
             1.0,
             0.0,
             0.0  # CA, DA
         ])
 def test_contract_names(self):
     game = pyspiel.load_game('bridge')
     self.assertEqual(game.contract_string(0), 'Passed Out')
     self.assertEqual(game.contract_string(38), '1SX N')
def main(_):
  game = pyspiel.load_game(FLAGS.game)
  expl = exploitability.exploitability(game, policy.UniformRandomPolicy(game))
  print("Exploitability: {}".format(expl))
Example #14
0
 def test_kuhn_poker(self):
   game = pyspiel.load_game("kuhn_poker")
   val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game)
   # value from Kuhn 1950 or https://en.wikipedia.org/wiki/Kuhn_poker
   self.assertAlmostEqual(val1, -1 / 18)
   self.assertAlmostEqual(val2, +1 / 18)
Example #15
0
 def test_game_sim(self, game_info):
   game = pyspiel.load_game(game_info.short_name)
   self.assertLessEqual(game_info.min_num_players, game.num_players())
   self.assertLessEqual(game.num_players(), game_info.max_num_players)
   self.sim_game(game)
Example #16
0
 def test_othello(self):
     game = pyspiel.load_game("othello")
     state = game.new_initial_state()
     self.assertFalse(state.is_chance_node())
     self.assertFalse(state.is_terminal())
     self.assertEqual(state.legal_actions(), [19, 26, 37, 44])
Example #17
0
 def test_multiplayer_game(self, game_info, num_players):
   game = pyspiel.load_game(game_info.short_name, {"players": num_players})
   self.sim_game(game)
Example #18
0
    def test_can_create_cpp_tabular_policy(self):
        for game_name in ["kuhn_poker", "leduc_poker", "liars_dice"]:
            game = pyspiel.load_game(game_name)

            # We just test that we can create a tabular policy.
            policy.python_policy_to_pyspiel_policy(policy.TabularPolicy(game))
Example #19
0
 def test_pig(self):
   # make a smaller lower win score
   game = pyspiel.load_game("pig(players=2,winscore=15)")
   self.sim_game(game)
Example #20
0
 def test_joint_action_probabilities_failure_on_seq_game(self):
   """Test failure of child on sequential games."""
   game = pyspiel.load_game("kuhn_poker")
   with self.assertRaises(AssertionError):
     list(policy.joint_action_probabilities(
         game.new_initial_state(), policy.UniformRandomPolicy(game)))
Example #21
0
  def test_observations_are_consistent_with_info_states(self, game_name):
    print(f"Testing observation <-> info_state consistency for '{game_name}'")
    game = pyspiel.load_game(game_name)
    game_type = game.get_type()

    if not game_type.provides_information_state_string \
      or not game_type.provides_observation_string:
      print(f"Skipping test for '{game_name}', as it doesn't provide both "
            "information_state_string and observation_string")
      return

    if game_type.dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS:
      logging.warning(
          "'%s' is not turn-based. Trying to reload game as turn-based.",
          game_name)
      game = pyspiel.load_game_as_turn_based(game_name)

    # Idea of the test: make rollouts in the game, and collect both
    # Action-Observation histories (AOH) and InformationState for different
    # ground states. Check that there is a unique bijection between them.
    #
    # Of course, this test does not exclude the possibility the game might
    # have a bug! But it is a fast way to discover a possible inconsistency
    # in a new implementation.
    aoh_is = dict()  # aoh -> info_state
    is_aoh = dict()  # info_state -> aoh
    aoh_histories = collections.defaultdict(set)  # aoh -> states
    is_histories = collections.defaultdict(set)  # info_states -> states

    # Some games have very long play-throughs.
    give_up_after = 100  # actions

    # Show a helpful error message for debugging the observations in a game.
    def show_error(histories, player, dump_collections=True):
      aohs = list()
      info_states = list()
      descriptions = list()
      # Emulate the histories to collect relevant lists.
      for history in histories:
        state = game.new_initial_state()
        aoh = [("obs", state.observation_string(player))]
        for action in history:
          state.apply_action(action)
          if state.current_player() == player:
            aoh.append(("action", action))
          aoh.append(("obs", state.observation_string(player)))
        aohs.append(aoh)
        info_states.append(state.information_state_string(player))
        descriptions.append(str(state))

      histories_str = "\n".join([str(history) for history in histories])
      descriptions_str = "\n".join(descriptions)
      aohs_str = "\n".join([str(aoh) for aoh in aohs])
      info_states_str = "\n".join([str(s) for s in info_states])

      if dump_collections:
        def format_dump(xs):
          return "\n".join([f"{str(key)}  ->  {str(value)}"
                            for key, value in xs.items()])
        # pylint: disable=g-backslash-continuation
        extras = "Dumping colections:\n" \
                 f"aoh -> info_state:\n{format_dump(aoh_is)}\n\n" \
                 f"info_state -> aoh:\n{format_dump(is_aoh)}\n\n" \
                 f"aoh -> histories:\n{format_dump(aoh_histories)}\n\n" \
                 f"info_state -> histories:\n{format_dump(is_histories)}\n\n"
      else:
        # pylint: disable=g-backslash-continuation
        extras = "Rerun this test with dump_collections=True " \
                 "for extra information."

      # pylint: disable=g-backslash-continuation
      msg = \
        f"\n\n" \
        f"The action-observation histories (AOH) are not consistent with " \
        f"information states for player {player}.\n\n" \
        f"The conflicting set of states (histories) is:\n{histories_str}\n\n" \
        f"Their domain-specific descriptions are:\n{descriptions_str}\n\n" \
        f"The corresponding AOH are:\n{aohs_str}\n\n" \
        f"The corresponding info states are:\n{info_states_str}\n\n" \
        f"{extras}\n" \
        f"What to do to fix this? Consult the documentation to " \
        f"State::InformationStateString and State::ObservationString."
      return msg

    def collect_and_test_rollouts(player):
      random.seed(0)
      nonlocal aoh_is, is_aoh, aoh_histories, is_histories
      state = game.new_initial_state()
      aoh = [("obs", state.observation_string(player))]

      # TODO(author13): we want to check terminals for consistency too, but info
      # state string is not defined there and neither are observations by
      # design.
      while not state.is_terminal():
        if len(state.history()) > give_up_after:
          break

        # Do not collect over chance nodes.
        if not state.is_chance_node():
          info_state = state.information_state_string()
          aoh_histories[str(aoh)].add(tuple(state.history()))
          is_histories[info_state].add(tuple(state.history()))

          states = {tuple(state.history())}
          states = states.union(aoh_histories[str(aoh)])
          states = states.union(is_histories[info_state])
          if str(aoh) in aoh_is:
            states = states.union(is_histories[aoh_is[str(aoh)]])
            self.assertEqual(aoh_is[str(aoh)], info_state,
                             show_error(states, player))
          else:
            aoh_is[str(aoh)] = info_state
          if info_state in is_aoh:
            states = states.union(aoh_histories[str(is_aoh[info_state])])
            self.assertEqual(is_aoh[info_state], str(aoh),
                             show_error(states, player))
          else:
            is_aoh[info_state] = str(aoh)

        # Make random actions.
        action = random.choice(state.legal_actions(state.current_player()))
        if state.current_player() == player:
          aoh.append(("action", action))
        state.apply_action(action)
        aoh.append(("obs", state.observation_string(player)))

    # Run (very roughly!) for this many seconds. This very much depends on the
    # machine the test runs on, as some games take a long time to produce
    # a single rollout.
    time_limit = TIMEABLE_TEST_RUNTIME / game.num_players()
    start = time.time()
    is_time_out = lambda: time.time() - start > time_limit

    rollouts = 0
    for player in range(game.num_players()):
      aoh_is.clear()
      is_aoh.clear()
      aoh_histories.clear()
      is_histories.clear()
      while not is_time_out():
        collect_and_test_rollouts(player)
        rollouts += 1

    print(f"Test for {game_name} took {time.time()-start} seconds "
          f"to make {rollouts} rollouts.")
Example #22
0
 def get_move_map(self):
     board_size = {"board_size": pyspiel.GameParameter(9)}
     game = pyspiel.load_game("go", board_size)
     state = game.new_initial_state()
     return state.legal_actions()
Example #23
0
                action,
                legal_actions,
                msg="The action {} is present in the policy but is not a legal "
                "actions (these are {})\n"
                "Legal actions missing from policy: {}\n"
                "Illegal actions present in policy: {}".format(
                    action, legal_actions, actions_missing, illegal_actions))

        sum_ = 0
        for prob in action_probabilities.values():
            sum_ += prob
            self.assertGreaterEqual(prob, 0)
        self.assertAlmostEqual(1, sum_)


_LEDUC_POKER = pyspiel.load_game("leduc_poker")


class CommonTest(parameterized.TestCase):
    @parameterized.parameters([
        policy.TabularPolicy(_LEDUC_POKER),
        policy.UniformRandomPolicy(_LEDUC_POKER),
        policy.FirstActionPolicy(_LEDUC_POKER),
    ])
    def test_policy_on_leduc(self, policy_object):
        test_policy_on_game(self, _LEDUC_POKER, policy_object)

    @parameterized.named_parameters([
        ("pyspiel.UniformRandom", pyspiel.UniformRandomPolicy(_LEDUC_POKER)),
    ])
    def test_cpp_policies_on_leduc(self, policy_object):
Example #24
0
def psro_measure_exploitability_nonlstm(
        br_checkpoint_path_tuple_list: List[Tuple[str, str]],
        metanash_weights: List[Tuple[float, float]],
        set_policy_weights_fn: Callable,
        rllib_policies: List[Policy],
        poker_game_version: str,
        open_spiel_env_config: dict = None):
    if open_spiel_env_config is None:
        if poker_game_version in ["kuhn_poker", "leduc_poker"]:
            open_spiel_env_config = {"players": pyspiel.GameParameter(2)}
        else:
            open_spiel_env_config = {}

    open_spiel_env_config = {
        k: pyspiel.GameParameter(v)
        if not isinstance(v, pyspiel.GameParameter) else v
        for k, v in open_spiel_env_config.items()
    }

    openspiel_game = pyspiel.load_game(poker_game_version,
                                       open_spiel_env_config)
    if poker_game_version == "oshi_zumo":
        openspiel_game = pyspiel.convert_to_turn_based(openspiel_game)

    def policy_iterable():
        for checkpoint_path_tuple in br_checkpoint_path_tuple_list:
            openspiel_policies = []
            for player, player_rllib_policy in enumerate(rllib_policies):
                checkpoint_path = checkpoint_path_tuple[player]
                if checkpoint_path not in _psro_tabular_policies_cache:
                    set_policy_weights_fn(player_rllib_policy,
                                          checkpoint_path=checkpoint_path)
                    single_openspiel_policy = openspiel_policy_from_nonlstm_rllib_policy(
                        openspiel_game=openspiel_game,
                        rllib_policy=player_rllib_policy,
                        game_version=poker_game_version,
                        game_parameters=open_spiel_env_config,
                    )
                    if CACHE_PSRO_TABULAR_POLICIES:
                        _psro_tabular_policies_cache[
                            checkpoint_path] = single_openspiel_policy
                else:
                    single_openspiel_policy = _psro_tabular_policies_cache[
                        checkpoint_path]

                openspiel_policies.append(single_openspiel_policy)
            yield openspiel_policies

    avg_policies = tabular_policies_from_weighted_policies(
        game=openspiel_game,
        policy_iterable=policy_iterable(),
        weights=metanash_weights)

    joint_player_policy = JointPlayerPolicy(game=openspiel_game,
                                            policies=avg_policies)

    # Exploitability is NashConv / num_players
    if poker_game_version == "universal_poker":
        print(
            "Measuring exploitability for universal_poker policy. This will take a while..."
        )
    exploitability_result = exploitability(game=openspiel_game,
                                           policy=joint_player_policy)
    return exploitability_result
Example #25
0
 def test_policy_attributes(self):
     game = pyspiel.load_game("tiny_bridge_4p")
     uniform_random_policy = policy.UniformRandomPolicy(game)
     self.assertEqual(uniform_random_policy.player_ids, [0, 1, 2, 3])
Example #26
0
 def test_can_play_game(self):
     game = pyspiel.load_game("kuhn_poker")
     self.assertIn("uniform_random", pyspiel.bots_that_can_play_game(game))
Example #27
0
 def test_create_game(self):
   game = pyspiel.load_game("kuhn_poker")
   game_info = game.get_type()
   self.assertEqual(game_info.information,
                    pyspiel.GameType.Information.IMPERFECT_INFORMATION)
   self.assertEqual(game.num_players(), 2)
Example #28
0
def main(_):
    games_list = pyspiel.registered_games()
    print("Registered games:")
    print(games_list)

    action_string = None

    print("Creating game: " + FLAGS.game)
    if FLAGS.players is not None:
        game = pyspiel.load_game(
            FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)})
    else:
        game = pyspiel.load_game(FLAGS.game)

    # Get a new state
    if FLAGS.load_state is not None:
        # Load a specific state
        state_string = ""
        with open(FLAGS.load_state, encoding="utf-8") as input_file:
            for line in input_file:
                state_string += line
        state_string = state_string.rstrip()
        print("Loading state:")
        print(state_string)
        print("")
        state = game.deserialize_state(state_string)
    else:
        state = game.new_initial_state()

    # Print the initial state
    print(str(state))

    while not state.is_terminal():
        # The state can be three different types: chance node,
        # simultaneous node, or decision node
        if state.is_chance_node():
            # Chance node: sample an outcome
            outcomes = state.chance_outcomes()
            num_actions = len(outcomes)
            print("Chance node, got " + str(num_actions) + " outcomes")
            action_list, prob_list = zip(*outcomes)
            action = np.random.choice(action_list, p=prob_list)
            print("Sampled outcome: ",
                  state.action_to_string(state.current_player(), action))
            state.apply_action(action)

        elif state.is_simultaneous_node():
            # Simultaneous node: sample actions for all players.
            chosen_actions = [
                random.choice(state.legal_actions(pid))
                for pid in range(game.num_players())
            ]
            print("Chosen actions: ", [
                state.action_to_string(pid, action)
                for pid, action in enumerate(chosen_actions)
            ])
            state.apply_actions(chosen_actions)

        else:
            # Decision node: sample action for the single current player
            action = random.choice(state.legal_actions(state.current_player()))
            action_string = state.action_to_string(state.current_player(),
                                                   action)
            print("Player ", state.current_player(),
                  ", randomly sampled action: ", action_string)
            state.apply_action(action)

        print(str(state))

    # Game is now done. Print utilities for each player
    returns = state.returns()
    for pid in range(game.num_players()):
        print("Utility for player {} is {}".format(pid, returns[pid]))
Example #29
0
 def test_error_handling(self):
   with six.assertRaisesRegex(self, RuntimeError,
                              "Unknown game 'invalid_game_name'"):
     unused_game = pyspiel.load_game("invalid_game_name")
 def test_leduc_poker_uniform_random(self):
     # NashConv taken from independent implementations
     game = pyspiel.load_game("leduc_poker")
     test_policy = policy.UniformRandomPolicy(game)
     self.assertAlmostEqual(exploitability.nash_conv(game, test_policy),
                            4.747222222222222)