def test_private_information_contents(self): try: private_observation = make_observation( self.game, pyspiel.IIGObservationType( public_info=False, perfect_recall=False, private_info=pyspiel.PrivateInfoType.SINGLE_PLAYER)) except (RuntimeError, ValueError): return player_has_private_info = [False] * self.game.num_players() for state in self.some_states: for i in range(self.game.num_players()): if private_observation.string_from(state, i) != \ pyspiel.PrivateObservation.NOTHING: player_has_private_info[i] = True if self.game_type.information == \ pyspiel.GameType.Information.IMPERFECT_INFORMATION: self.assertTrue(any(player_has_private_info)) if self.game_type.information == \ pyspiel.GameType.Information.PERFECT_INFORMATION: none_of = lambda x: not any(x) self.assertTrue(none_of(player_has_private_info))
def test_compression_binary(self): # All infostates for leduc are binary, so we can compress them effectively. game = pyspiel.load_game("leduc_poker") obs1 = make_observation( game, pyspiel.IIGObservationType(perfect_recall=True)) obs2 = make_observation( game, pyspiel.IIGObservationType(perfect_recall=True)) self.assertLen(obs1.tensor, 30) # 30 floats = 120 bytes for state in get_all_states.get_all_states(game).values(): for player in range(game.num_players()): obs1.set_from(state, player) compressed = obs1.compress() self.assertEqual(type(compressed), bytes) self.assertLen(compressed, 5) obs2.decompress(compressed) np.testing.assert_array_equal(obs1.tensor, obs2.tensor)
def test_leduc_info_state(self): game = pyspiel.load_game("leduc_poker") observation = make_observation( game, pyspiel.IIGObservationType(perfect_recall=True)) state = game.new_initial_state() state.apply_action(1) # Deal 1 state.apply_action(2) # Deal 2 state.apply_action(2) # Bet state.apply_action(1) # Call state.apply_action(3) # Deal 3 observation.set_from(state, player=0) np.testing.assert_array_equal(observation.tensor, [ 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) self.assertEqual( list(observation.dict), ["player", "private_card", "community_card", "betting"]) np.testing.assert_array_equal(observation.dict["player"], [1, 0]) np.testing.assert_array_equal(observation.dict["private_card"], [0, 1, 0, 0, 0, 0]) np.testing.assert_array_equal(observation.dict["community_card"], [0, 0, 0, 1, 0, 0]) np.testing.assert_array_equal( observation.dict["betting"], [ [[0, 1], [1, 0], [0, 0], [0, 0]], # First round [[0, 0], [0, 0], [0, 0], [0, 0]], # Second round ]) self.assertEqual( observation.string_from(state, 0), "[Round 2][Player: 0][Pot: 6][Money: 97 97[Private: 1]]" "[Round1]: 2 1[Public: 3]\nRound 2 sequence: ")
def change_observation(self, game: pyspiel.Game, player: int, public_info: bool, perfect_recall: bool, private_info: pyspiel.PrivateInfoType): self.player = player observation_type = pyspiel.IIGObservationType(public_info, perfect_recall, private_info) try: self.observation = make_observation(game, observation_type) except RuntimeError as e: self.observation = None logging.warning("Could not make observation: ", e)
def test_no_invalid_public_observations(self): try: public_observation = make_observation( self.game, pyspiel.IIGObservationType( public_info=True, perfect_recall=False, private_info=pyspiel.PrivateInfoType.NONE)) except (ValueError, RuntimeError): return for state in self.some_states: self.assertNotEqual(public_observation.string_from(state, 0), pyspiel.PublicObservation.INVALID)
def update(self, state: pyspiel.State): try: fen = str(state) board = chess.Board(fen) obs_type = pyspiel.IIGObservationType( public_info=True, perfect_recall=False, private_info=pyspiel.PrivateInfoType.SINGLE_PLAYER) observation = make_observation(state.get_game(), obs_type) for player in range(2): observation.set_from(state, player) self.write_image(board, observation, player) except RuntimeError as e: # Might throw if we use a different board size than 8. # Ignore the rendering then. logging.warning(e)
def test_leduc_all_player_privates(self): game = pyspiel.load_game("leduc_poker") observation = make_observation( game, pyspiel.IIGObservationType( perfect_recall=True, private_info=pyspiel.PrivateInfoType.ALL_PLAYERS)) state = game.new_initial_state() state.apply_action(1) # Deal 1 state.apply_action(2) # Deal 2 state.apply_action(2) # Bet state.apply_action(1) # Call state.apply_action(3) # Deal 3 observation.set_from(state, player=0) np.testing.assert_array_equal(observation.dict["private_cards"], [ [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], ])
def test_consistent(self): """Checks the Python and C++ game implementations are the same.""" py_game = pyspiel.load_game("python_kuhn_poker") cc_game = pyspiel.load_game("kuhn_poker") obs_types = [None, pyspiel.IIGObservationType(perfect_recall=True)] py_observations = [make_observation(py_game, o) for o in obs_types] cc_observations = [make_observation(cc_game, o) for o in obs_types] py_states = get_all_states(py_game) cc_states = get_all_states(cc_game) self.assertCountEqual(list(cc_states), list(py_states)) for key, cc_state in cc_states.items(): py_state = py_states[key] np.testing.assert_array_equal(py_state.history(), cc_state.history()) np.testing.assert_array_equal(py_state.returns(), cc_state.returns()) for py_obs, cc_obs in zip(py_observations, cc_observations): for player in (0, 1): py_obs.set_from(py_state, player) cc_obs.set_from(cc_state, player) np.testing.assert_array_equal(py_obs.tensor, cc_obs.tensor)
def make_py_observer(self, iig_obs_type=None, params=None): """Returns an object used for observing game state.""" return IteratedPrisonersDilemmaObserver( iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), params)
def playthrough_lines(game_string, alsologtostdout=False, action_sequence=None): """Returns a playthrough of the specified game as a list of lines. Actions are selected uniformly at random, including chance actions. Args: game_string: string, e.g. 'markov_soccer' or 'kuhn_poker(players=4)'. alsologtostdout: Whether to also print the trace to stdout. This can be useful when an error occurs, to still be able to get context information. action_sequence: A (possibly partial) list of action choices to make. """ lines = [] action_sequence = action_sequence or [] if alsologtostdout: def add_line(v): print(v) lines.append(v) else: add_line = lines.append game = _load_game(game_string) add_line("game: {}".format(game_string)) seed = np.random.randint(2**32 - 1) game_type = game.get_type() default_observation = None try: default_observation = make_observation(game) except (RuntimeError, ValueError): pass infostate_observation = None # if game_type.information in (pyspiel.IMPERFECT_INFORMATION, # pyspiel.ONE_SHOT): try: infostate_observation = make_observation( game, pyspiel.IIGObservationType(perfect_recall=True)) except (RuntimeError, ValueError): pass add_line("") add_line("GameType.chance_mode = {}".format(game_type.chance_mode)) add_line("GameType.dynamics = {}".format(game_type.dynamics)) add_line("GameType.information = {}".format(game_type.information)) add_line("GameType.long_name = {}".format('"{}"'.format( game_type.long_name))) add_line("GameType.max_num_players = {}".format(game_type.max_num_players)) add_line("GameType.min_num_players = {}".format(game_type.min_num_players)) add_line("GameType.parameter_specification = {}".format("[{}]".format( ", ".join('"{}"'.format(param) for param in sorted(game_type.parameter_specification))))) add_line("GameType.provides_information_state_string = {}".format( game_type.provides_information_state_string)) add_line("GameType.provides_information_state_tensor = {}".format( game_type.provides_information_state_tensor)) add_line("GameType.provides_observation_string = {}".format( game_type.provides_observation_string)) add_line("GameType.provides_observation_tensor = {}".format( game_type.provides_observation_tensor)) add_line("GameType.provides_factored_observation_string = {}".format( game_type.provides_factored_observation_string)) add_line("GameType.reward_model = {}".format(game_type.reward_model)) add_line("GameType.short_name = {}".format('"{}"'.format( game_type.short_name))) add_line("GameType.utility = {}".format(game_type.utility)) add_line("") add_line("NumDistinctActions() = {}".format(game.num_distinct_actions())) add_line("PolicyTensorShape() = {}".format(game.policy_tensor_shape())) add_line("MaxChanceOutcomes() = {}".format(game.max_chance_outcomes())) add_line("GetParameters() = {{{}}}".format(",".join( "{}={}".format(key, _escape(str(value))) for key, value in sorted(game.get_parameters().items())))) add_line("NumPlayers() = {}".format(game.num_players())) add_line("MinUtility() = {:.5}".format(game.min_utility())) add_line("MaxUtility() = {:.5}".format(game.max_utility())) try: utility_sum = game.utility_sum() except RuntimeError: utility_sum = None add_line("UtilitySum() = {}".format(utility_sum)) if infostate_observation and infostate_observation.tensor is not None: add_line("InformationStateTensorShape() = {}".format( format_shapes(infostate_observation.dict))) add_line("InformationStateTensorLayout() = {}".format( game.information_state_tensor_layout())) add_line("InformationStateTensorSize() = {}".format( len(infostate_observation.tensor))) if default_observation and default_observation.tensor is not None: add_line("ObservationTensorShape() = {}".format( format_shapes(default_observation.dict))) add_line("ObservationTensorLayout() = {}".format( game.observation_tensor_layout())) add_line("ObservationTensorSize() = {}".format( len(default_observation.tensor))) add_line("MaxGameLength() = {}".format(game.max_game_length())) add_line('ToString() = "{}"'.format(str(game))) players = list(range(game.num_players())) state = game.new_initial_state() state_idx = 0 rng = np.random.RandomState(seed) while True: add_line("") add_line("# State {}".format(state_idx)) for line in str(state).splitlines(): add_line("# {}".format(line).rstrip()) add_line("IsTerminal() = {}".format(state.is_terminal())) add_line("History() = {}".format([int(a) for a in state.history()])) add_line('HistoryString() = "{}"'.format(state.history_str())) add_line("IsChanceNode() = {}".format(state.is_chance_node())) add_line("IsSimultaneousNode() = {}".format( state.is_simultaneous_node())) add_line("CurrentPlayer() = {}".format(state.current_player())) if infostate_observation: for player in players: s = infostate_observation.string_from(state, player) if s is not None: add_line( f'InformationStateString({player}) = "{_escape(s)}"') if infostate_observation and infostate_observation.tensor is not None: for player in players: infostate_observation.set_from(state, player) for name, tensor in infostate_observation.dict.items(): label = f"InformationStateTensor({player})" label += f".{name}" if name != "info_state" else "" lines += _format_tensor(tensor, label) if default_observation: for player in players: s = default_observation.string_from(state, player) if s is not None: add_line(f'ObservationString({player}) = "{_escape(s)}"') if game.get_type().provides_factored_observation_string: add_line('PublicObservationString() = "{}"'.format( _escape(state.public_observation_string()))) for player in players: add_line('PrivateObservationString({}) = "{}"'.format( player, _escape(state.private_observation_string(player)))) if default_observation and default_observation.tensor is not None: for player in players: default_observation.set_from(state, player) for name, tensor in default_observation.dict.items(): label = f"ObservationTensor({player})" label += f".{name}" if name != "observation" else "" lines += _format_tensor(tensor, label) if game_type.chance_mode == pyspiel.GameType.ChanceMode.SAMPLED_STOCHASTIC: add_line('SerializeState() = "{}"'.format( _escape(state.serialize()))) if not state.is_chance_node(): add_line("Rewards() = {}".format(state.rewards())) add_line("Returns() = {}".format(state.returns())) if state.is_terminal(): break if state.is_chance_node(): # In Python 2 and Python 3, the default number of decimal places displayed # is different. Thus, we hardcode a ".12" which is Python 2 behaviour. add_line("ChanceOutcomes() = [{}]".format(", ".join( "{{{}, {:.12f}}}".format(outcome, prob) for outcome, prob in state.chance_outcomes()))) if state.is_simultaneous_node(): for player in players: add_line("LegalActions({}) = [{}]".format( player, ", ".join(str(x) for x in state.legal_actions(player)))) for player in players: add_line("StringLegalActions({}) = [{}]".format( player, ", ".join('"{}"'.format(state.action_to_string(player, x)) for x in state.legal_actions(player)))) if state_idx < len(action_sequence): actions = action_sequence[state_idx] else: actions = [ rng.choice(state.legal_actions(pl)) for pl in players ] add_line("") add_line("# Apply joint action [{}]".format( format(", ".join( '"{}"'.format(state.action_to_string(player, action)) for player, action in enumerate(actions))))) add_line("actions: [{}]".format(", ".join( str(action) for action in actions))) state.apply_actions(actions) else: add_line("LegalActions() = [{}]".format(", ".join( str(x) for x in state.legal_actions()))) add_line("StringLegalActions() = [{}]".format( ", ".join('"{}"'.format( state.action_to_string(state.current_player(), x)) for x in state.legal_actions()))) if state_idx < len(action_sequence): action = action_sequence[state_idx] else: action = rng.choice(state.legal_actions()) add_line("") add_line('# Apply action "{}"'.format( state.action_to_string(state.current_player(), action))) add_line("action: {}".format(action)) state.apply_action(action) state_idx += 1 return lines
def playthrough_lines(game_string, alsologtostdout=False, action_sequence=None, observation_params_string=None, seed: Optional[int] = None): """Returns a playthrough of the specified game as a list of lines. Actions are selected uniformly at random, including chance actions. Args: game_string: string, e.g. 'markov_soccer' or 'kuhn_poker(players=4)'. alsologtostdout: Whether to also print the trace to stdout. This can be useful when an error occurs, to still be able to get context information. action_sequence: A (possibly partial) list of action choices to make. observation_params_string: Optional observation parameters for constructing an observer. seed: A(n optional) seed to initialize the random number generator from. """ should_display_state_fn = ShouldDisplayStateTracker() lines = [] action_sequence = action_sequence or [] should_display = True def add_line(v, force=False): if force or should_display: if alsologtostdout: print(v) lines.append(v) game = pyspiel.load_game(game_string) add_line("game: {}".format(game_string)) if observation_params_string: add_line("observation_params: {}".format(observation_params_string)) if seed is None: seed = np.random.randint(2**32 - 1) game_type = game.get_type() default_observation = None try: observation_params = pyspiel.game_parameters_from_string( observation_params_string) if observation_params_string else None default_observation = make_observation( game, imperfect_information_observation_type=None, params=observation_params) except (RuntimeError, ValueError) as e: print("Warning: unable to build an observation: ", e) infostate_observation = None # TODO(author11) reinstate this restriction # if game_type.information in (pyspiel.IMPERFECT_INFORMATION, # pyspiel.ONE_SHOT): try: infostate_observation = make_observation( game, pyspiel.IIGObservationType(perfect_recall=True)) except (RuntimeError, ValueError): pass public_observation = None private_observation = None # Instantiate factored observations only for imperfect information games, # as it would yield unncessarily redundant information for perfect info games. # The default observation is the same as the public observation, while private # observations are always empty. if game_type.information == pyspiel.GameType.Information.IMPERFECT_INFORMATION: try: public_observation = make_observation( game, pyspiel.IIGObservationType( public_info=True, perfect_recall=False, private_info=pyspiel.PrivateInfoType.NONE)) except (RuntimeError, ValueError): pass try: private_observation = make_observation( game, pyspiel.IIGObservationType( public_info=False, perfect_recall=False, private_info=pyspiel.PrivateInfoType.SINGLE_PLAYER)) except (RuntimeError, ValueError): pass add_line("") add_line("GameType.chance_mode = {}".format(game_type.chance_mode)) add_line("GameType.dynamics = {}".format(game_type.dynamics)) add_line("GameType.information = {}".format(game_type.information)) add_line("GameType.long_name = {}".format('"{}"'.format( game_type.long_name))) add_line("GameType.max_num_players = {}".format(game_type.max_num_players)) add_line("GameType.min_num_players = {}".format(game_type.min_num_players)) add_line("GameType.parameter_specification = {}".format("[{}]".format( ", ".join('"{}"'.format(param) for param in sorted(game_type.parameter_specification))))) add_line("GameType.provides_information_state_string = {}".format( game_type.provides_information_state_string)) add_line("GameType.provides_information_state_tensor = {}".format( game_type.provides_information_state_tensor)) add_line("GameType.provides_observation_string = {}".format( game_type.provides_observation_string)) add_line("GameType.provides_observation_tensor = {}".format( game_type.provides_observation_tensor)) add_line("GameType.provides_factored_observation_string = {}".format( game_type.provides_factored_observation_string)) add_line("GameType.reward_model = {}".format(game_type.reward_model)) add_line("GameType.short_name = {}".format('"{}"'.format( game_type.short_name))) add_line("GameType.utility = {}".format(game_type.utility)) add_line("") add_line("NumDistinctActions() = {}".format(game.num_distinct_actions())) add_line("PolicyTensorShape() = {}".format(game.policy_tensor_shape())) add_line("MaxChanceOutcomes() = {}".format(game.max_chance_outcomes())) add_line("GetParameters() = {}".format( _format_params(game.get_parameters()))) add_line("NumPlayers() = {}".format(game.num_players())) add_line("MinUtility() = {:.5}".format(game.min_utility())) add_line("MaxUtility() = {:.5}".format(game.max_utility())) try: utility_sum = game.utility_sum() except RuntimeError: utility_sum = None add_line("UtilitySum() = {}".format(utility_sum)) if infostate_observation and infostate_observation.tensor is not None: add_line("InformationStateTensorShape() = {}".format( format_shapes(infostate_observation.dict))) add_line("InformationStateTensorLayout() = {}".format( game.information_state_tensor_layout())) add_line("InformationStateTensorSize() = {}".format( len(infostate_observation.tensor))) if default_observation and default_observation.tensor is not None: add_line("ObservationTensorShape() = {}".format( format_shapes(default_observation.dict))) add_line("ObservationTensorLayout() = {}".format( game.observation_tensor_layout())) add_line("ObservationTensorSize() = {}".format( len(default_observation.tensor))) add_line("MaxGameLength() = {}".format(game.max_game_length())) add_line('ToString() = "{}"'.format(str(game))) players = list(range(game.num_players())) # Arbitrarily pick the last possible initial states (for all games # but multi-population MFGs, there will be a single initial state). state = game.new_initial_states()[-1] state_idx = 0 rng = np.random.RandomState(seed) while True: should_display = should_display_state_fn(state) add_line("", force=True) add_line("# State {}".format(state_idx), force=True) for line in str(state).splitlines(): add_line("# {}".format(line).rstrip()) add_line("IsTerminal() = {}".format(state.is_terminal())) add_line("History() = {}".format([int(a) for a in state.history()])) add_line('HistoryString() = "{}"'.format(state.history_str())) add_line("IsChanceNode() = {}".format(state.is_chance_node())) add_line("IsSimultaneousNode() = {}".format( state.is_simultaneous_node())) add_line("CurrentPlayer() = {}".format(state.current_player())) if infostate_observation: for player in players: s = infostate_observation.string_from(state, player) if s is not None: add_line( f'InformationStateString({player}) = "{_escape(s)}"') if infostate_observation and infostate_observation.tensor is not None: for player in players: infostate_observation.set_from(state, player) for name, tensor in infostate_observation.dict.items(): label = f"InformationStateTensor({player})" label += f".{name}" if name != "info_state" else "" for line in _format_tensor(tensor, label): add_line(line) if default_observation: for player in players: s = default_observation.string_from(state, player) if s is not None: add_line(f'ObservationString({player}) = "{_escape(s)}"') if public_observation: s = public_observation.string_from(state, 0) if s is not None: add_line('PublicObservationString() = "{}"'.format(_escape(s))) for player in players: s = private_observation.string_from(state, player) if s is not None: add_line( f'PrivateObservationString({player}) = "{_escape(s)}"') if default_observation and default_observation.tensor is not None: for player in players: default_observation.set_from(state, player) for name, tensor in default_observation.dict.items(): label = f"ObservationTensor({player})" label += f".{name}" if name != "observation" else "" for line in _format_tensor(tensor, label): add_line(line) if game_type.chance_mode == pyspiel.GameType.ChanceMode.SAMPLED_STOCHASTIC: add_line('SerializeState() = "{}"'.format( _escape(state.serialize()))) if not state.is_chance_node(): add_line("Rewards() = {}".format(state.rewards())) add_line("Returns() = {}".format(state.returns())) if state.is_terminal(): break if state.is_chance_node(): add_line("ChanceOutcomes() = {}".format(state.chance_outcomes())) if state.is_mean_field_node(): add_line("DistributionSupport() = {}".format( state.distribution_support())) num_states = len(state.distribution_support()) state.update_distribution([1. / num_states] * num_states if num_states else []) if state_idx < len(action_sequence): assert action_sequence[state_idx] == "update_distribution", ( f"Unexpected action at MFG node: {action_sequence[state_idx]}, " f"state: {state}, action_sequence: {action_sequence}") add_line("") add_line("# Set mean field distribution to be uniform", force=True) add_line("action: update_distribution", force=True) elif state.is_simultaneous_node(): for player in players: add_line("LegalActions({}) = [{}]".format( player, ", ".join(str(x) for x in state.legal_actions(player)))) for player in players: add_line("StringLegalActions({}) = [{}]".format( player, ", ".join('"{}"'.format(state.action_to_string(player, x)) for x in state.legal_actions(player)))) if state_idx < len(action_sequence): actions = action_sequence[state_idx] else: actions = [] for pl in players: legal_actions = state.legal_actions(pl) actions.append( 0 if not legal_actions else rng.choice(legal_actions)) add_line("") add_line("# Apply joint action [{}]".format( format(", ".join( '"{}"'.format(state.action_to_string(player, action)) for player, action in enumerate(actions)))), force=True) add_line("actions: [{}]".format(", ".join( str(action) for action in actions)), force=True) state.apply_actions(actions) else: add_line("LegalActions() = [{}]".format(", ".join( str(x) for x in state.legal_actions()))) add_line("StringLegalActions() = [{}]".format( ", ".join('"{}"'.format( state.action_to_string(state.current_player(), x)) for x in state.legal_actions()))) if state_idx < len(action_sequence): action = action_sequence[state_idx] else: action = rng.choice(state.legal_actions()) add_line("") add_line('# Apply action "{}"'.format( state.action_to_string(state.current_player(), action)), force=True) add_line("action: {}".format(action), force=True) state.apply_action(action) state_idx += 1 return lines
"""A bot that picks the first action from the list. Used only for tests.""" import base64 import sys from open_spiel.python.observation import make_observation import pyspiel game_name = input() play_as = int(input()) game = pyspiel.load_game(game_name) public_observation = make_observation( game, pyspiel.IIGObservationType( perfect_recall=False, public_info=True, private_info=pyspiel.PrivateInfoType.NONE)) private_observation = make_observation( game, pyspiel.IIGObservationType( perfect_recall=False, public_info=False, private_info=pyspiel.PrivateInfoType.SINGLE_PLAYER)) print("ready") while True: print("start") while True: message = input() if message == "tournament over": print("tournament over")
Three formats of observation are supported: a. 1-D numpy array, accessed by `observation.tensor` b. Dict of numpy arrays, accessed by `observation.dict`. These are pieces of the 1-D array, reshaped. The np.array objects refer to the same memory as the 1-D array (no copying!). c. String, hopefully human-readable (primarily for debugging purposes) For usage examples, see `observation_test.py`. """ import numpy as np import pyspiel # Corresponds to the old information_state_XXX methods. INFO_STATE_OBS_TYPE = pyspiel.IIGObservationType(perfect_recall=True) class _Observation: """Contains an observation from a game.""" def __init__(self, game, imperfect_information_observation_type, params): if imperfect_information_observation_type is not None: obs = game.make_observer(imperfect_information_observation_type, params) else: obs = game.make_observer(params) self._observation = pyspiel._Observation(game, obs) self.dict = {} if self._observation.has_tensor(): self.tensor = np.frombuffer(self._observation, np.float32) offset = 0