Example #1
0
def main(_):
    rng = np.random.RandomState(FLAGS.seed)

    # Make sure poker is compiled into the library, as it requires an optional
    # dependency: the ACPC poker code. To ensure it is compiled in, prepend both
    # the install.sh and build commands with OPEN_SPIEL_BUILD_WITH_ACPC=ON.
    # See here:
    # https://github.com/deepmind/open_spiel/blob/master/docs/install.md#configuration-conditional-dependencies
    # for more details on optional dependencies.
    games_list = pyspiel.registered_names()
    assert "universal_poker" in games_list

    fcpa_game_string = pyspiel.hunl_game_string("fcpa")
    print("Creating game: {}".format(fcpa_game_string))
    game = pyspiel.load_game(fcpa_game_string)

    agents = [
        LoadAgent(FLAGS.player0, game, 0, rng),
        LoadAgent(FLAGS.player1, game, 1, rng)
    ]

    state = game.new_initial_state()

    # Print the initial state
    print("INITIAL STATE")
    print(str(state))

    while not state.is_terminal():
        # The state can be three different types: chance node,
        # simultaneous node, or decision node
        current_player = state.current_player()
        if state.is_chance_node():
            # Chance node: sample an outcome
            outcomes = state.chance_outcomes()
            num_actions = len(outcomes)
            print("Chance node with " + str(num_actions) + " outcomes")
            action_list, prob_list = zip(*outcomes)
            action = rng.choice(action_list, p=prob_list)
            print("Sampled outcome: ",
                  state.action_to_string(state.current_player(), action))
            state.apply_action(action)
        else:
            # Decision node: sample action for the single current player
            legal_actions = state.legal_actions()
            for action in legal_actions:
                print("Legal action: {} ({})".format(
                    state.action_to_string(current_player, action), action))
            action = agents[current_player].step(state)
            action_string = state.action_to_string(current_player, action)
            print("Player ", current_player, ", chose action: ", action_string)
            state.apply_action(action)

        print("")
        print("NEXT STATE:")
        print(str(state))

    # Game is now done. Print utilities for each player
    returns = state.returns()
    for pid in range(game.num_players()):
        print("Utility for player {} is {}".format(pid, returns[pid]))
Example #2
0
  def test_registered_names(self):
    game_names = pyspiel.registered_names()

    # Specify game names in alphabetical order, to make the test easier to read.
    expected = set([
        "backgammon",
        "blotto",
        "breakthrough",
        "bridge",
        "bridge_uncontested_bidding",
        "catch",
        "chess",
        "cliff_walking",
        "coin_game",
        "connect_four",
        "coop_box_pushing",
        "coop_to_1p",
        "deep_sea",
        "first_sealed_auction",
        "go",
        "goofspiel",
        "havannah",
        "hex",
        "kuhn_poker",
        "laser_tag",
        "leduc_poker",
        "liars_dice",
        "markov_soccer",
        "matching_pennies_3p",
        "matrix_cd",
        "matrix_coordination",
        "matrix_mp",
        "matrix_pd",
        "matrix_rps",
        "matrix_rpsw",
        "matrix_sh",
        "matrix_shapleys_game",
        "misere",
        "negotiation",
        "normal_form_extensive_game",
        "oshi_zumo",
        "oware",
        "pentago",
        "phantom_ttt",
        "pig",
        "quoridor",
        "tic_tac_toe",
        "tiny_bridge_2p",
        "tiny_bridge_4p",
        "tiny_hanabi",
        "turn_based_simultaneous_game",
        "y",
    ])

    if os.environ.get("BUILD_WITH_HANABI", "OFF") == "ON":
      expected.add("hanabi")
    if os.environ.get("BUILD_WITH_ACPC", "OFF") == "ON":
      expected.add("universal_poker")
    expected = sorted(list(expected))
    self.assertCountEqual(game_names, expected)
    def test_registered_names(self):
        game_names = pyspiel.registered_names()

        expected = EXPECTED_GAMES
        if os.environ.get("BUILD_WITH_HANABI", "OFF") == "ON":
            expected.add("hanabi")
        if os.environ.get("BUILD_WITH_ACPC", "OFF") == "ON":
            expected.add("universal_poker")
        expected = sorted(list(expected))
        self.assertCountEqual(game_names, expected)
Example #4
0
    def test_registered_names(self):
        game_names = pyspiel.registered_names()

        # Specify game names in alphabetical order, to make the test easier to read.
        expected = [
            "backgammon",
            "blotto",
            "breakthrough",
            "bridge_uncontested_bidding",
            "catch",
            "chess",
            "coin_game",
            "connect_four",
            "coop_box_pushing",
            "first_sealed_auction",
            "go",
            "goofspiel",
            "havannah",
            "hex",
            "kuhn_poker",
            "laser_tag",
            "leduc_poker",
            "liars_dice",
            "markov_soccer",
            "matching_pennies_3p",
            "matrix_cd",
            "matrix_coordination",
            "matrix_mp",
            "matrix_pd",
            "matrix_rps",
            "matrix_rpsw",
            "matrix_sh",
            "matrix_shapleys_game",
            "misere",
            "negotiation",
            "oshi_zumo",
            "oware",
            "pentago",
            "phantom_ttt",
            "pig",
            "quoridor",
            "tic_tac_toe",
            "tiny_bridge_2p",
            "tiny_bridge_4p",
            "tiny_hanabi",
            "turn_based_simultaneous_game",
            "y",
        ]
        self.assertCountEqual(game_names, expected)
    def test_run_hanabi(self):
        # Hanabi is an optional game, so check we have it before running the test.
        game = "hanabi"
        if game not in pyspiel.registered_names():
            return

        num_players = 3
        env_configs = {
            "players": num_players,
            "max_life_tokens": 1,
            "colors": 2,
            "ranks": 3,
            "hand_size": 2,
            "max_information_tokens": 3,
            "discount": 0.
        }
        env = rl_environment.Environment(game, **env_configs)
        info_state_size = env.observation_spec()["info_state"][0]
        num_actions = env.action_spec()["num_actions"]

        with self.session() as sess:
            agents = [
                policy_gradient.PolicyGradient(  # pylint: disable=g-complex-comprehension
                    sess,
                    player_id=player_id,
                    info_state_size=info_state_size,
                    num_actions=num_actions,
                    hidden_layers_sizes=[8, 8],
                    batch_size=16,
                    entropy_cost=0.001,
                    critic_learning_rate=0.01,
                    pi_learning_rate=0.01,
                    num_critic_before_pi=4) for player_id in range(num_players)
            ]
            sess.run(tf.global_variables_initializer())
            time_step = env.reset()
            while not time_step.last():
                current_player = time_step.observations["current_player"]
                agent_output = [agent.step(time_step) for agent in agents]
                time_step = env.step([agent_output[current_player].action])

            for agent in agents:
                agent.step(time_step)
Example #6
0
  def test_run_landlord(self):
    # landlord is an optional game, so check we have it before running the test.
    game = "landlord"
    if game not in pyspiel.registered_names():
      return

    num_players = 3
    env_configs = {
    }
    env = rl_environment.Environment(game, **env_configs)
    state_size = env.observation_spec()["info_state"][0]
    num_actions = env.action_spec()["num_actions"]

    with self.session() as sess:
      agents = [
          dqn.DQN(  # pylint: disable=g-complex-comprehension
              sess,
              player_id,
              state_representation_size=state_size,
              num_actions=num_actions,
              hidden_layers_sizes=[16],
              replay_buffer_capacity=10,
              batch_size=5) for player_id in range(num_players)
      ]
      sess.run(tf.global_variables_initializer())
      time_step = env.reset()
      while not time_step.last():
        current_player = time_step.observations["current_player"]
        #agent_output = [agent.step(time_step) for agent in agents]
        #time_step = env.step([agent_output[current_player].action])
        if env.is_turn_based:
          agent_output = agents[current_player].step(time_step)
          action_list = [agent_output.action]
        else:
          agents_output = [agent.step(time_step) for agent in agents]
          action_list = [agent_output.action for agent_output in agents_output]
        print_iteration(time_step, current_player, action_list)
        time_step = env.step(action_list)

      for agent in agents:
        agent.step(time_step)
Example #7
0
    def test_run_hanabi(self):
        # Hanabi is an optional game, so check we have it before running the test.
        game = "hanabi"
        if game not in pyspiel.registered_names():
            return

        num_players = 3
        env_configs = {
            "players": num_players,
            "max_life_tokens": 1,
            "colors": 2,
            "ranks": 3,
            "hand_size": 2,
            "max_information_tokens": 3,
            "discount": 0.
        }
        env = rl_environment.Environment(game, **env_configs)
        state_size = env.observation_spec()["info_state"][0]
        num_actions = env.action_spec()["num_actions"]

        with self.session() as sess:
            agents = [
                dqn.DQN(  # pylint: disable=g-complex-comprehension
                    sess,
                    player_id,
                    state_representation_size=state_size,
                    num_actions=num_actions,
                    hidden_layers_sizes=[16],
                    replay_buffer_capacity=10,
                    batch_size=5) for player_id in range(num_players)
            ]
            sess.run(tf.global_variables_initializer())
            time_step = env.reset()
            while not time_step.last():
                current_player = time_step.observations["current_player"]
                agent_output = [agent.step(time_step) for agent in agents]
                time_step = env.step([agent_output[current_player].action])

            for agent in agents:
                agent.step(time_step)
Example #8
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

from absl import logging
from absl.testing import absltest

from open_spiel.python.algorithms import generate_playthrough
import pyspiel

_DATA_DIR = "open_spiel/integration_tests/playthroughs/"

_OPTIONAL_GAMES = frozenset(["hanabi", "universal_poker"])
_AVAILABLE_GAMES = set(pyspiel.registered_names())


def _is_optional_game(basename):
    """Returns (bool, game_name or None).

  Args:
    basename: The basename of the file. It is assumed it starts with the game
      name.
  """
    for game_name in _OPTIONAL_GAMES:
        if basename.startswith(game_name):
            return True, game_name
    return False, None

Example #9
0
import unittest

from absl.testing import absltest
from absl.testing import parameterized

import numpy as np

from open_spiel.python.algorithms import get_all_states
import pyspiel

_MANDATORY_PARAMETERS_GAMES = [
    "misere", "turn_based_simultaneous_game", "normal_form_extensive_game"
]

_GAMES_TO_TEST = list(
    set(pyspiel.registered_names()) - set(_MANDATORY_PARAMETERS_GAMES))

# The list of game instances to test on the full tree as tuples
# (name to display, string to pass to load_game).
_GAMES_FULL_TREE_TRAVERSAL_TESTS = [
    ("catch", "catch(rows=6,columns=3)"),
    ("cliff_walking", "cliff_walking(horizon=7)"),
    ("deep_sea", "deep_sea(size=3)"),
    ("kuhn_poker", "kuhn_poker"),
    ("leduc_poker", "leduc_poker"),
    ("iigoofspiel4", "turn_based_simultaneous_game(game=goofspiel("
     "imp_info=True,num_cards=4,points_order=descending))"),
    ("kuhn_poker3p", "kuhn_poker(players=3)"),
    ("first_sealed_auction", "first_sealed_auction(max_value=2)"),
    ("tiny_hanabi", "tiny_hanabi"),
    ("nf_auction", "turn_based_simultaneous_game(game="
Example #10
0
    # Times out (to investigate)
    "backgammon",  # Likely too large for depth limit 5 (huge branching factor).
    "breakthrough",
    "bridge_uncontested_bidding",
    "havannah",
    "hex",
    "chess",
    "go",
    "pentago",
    # Mandatory parameters
    "misere",
    "turn_based_simultaneous_game",
    "y",
]

_GAMES_TO_TEST = list(set(pyspiel.registered_names()) - set(_EXCLUDED_GAMES))

# The list of game instances to test on the full tree as tuples
# (name to display, string to pass to load_game).
_GAMES_FULL_TREE_TRAVERSAL_TESTS = [
    ("catch", "catch"),
    ("kuhn_poker", "kuhn_poker"),
    ("leduc_poker", "leduc_poker"),
    # Disabled as this slows down the test significantly. (12s to 150s).
    # Enable it to check the game when you modify it.
    # ("liars_dice", "liars_dice"),
    ("iigoofspiel4", "turn_based_simultaneous_game(game=goofspiel("
     "imp_info=True,num_cards=4,points_order=descending))"),
    ("kuhn_poker3p", "kuhn_poker(players=3)"),
    ("first_sealed_auction", "first_sealed_auction(max_value=2)"),
]