def serialize_deserialize(self, game, state): # OpenSpiel native serialization ser_str = pyspiel.serialize_game_and_state(game, state) new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) self.assertEqual(str(game), str(new_game)) self.assertEqual(str(state), str(new_state)) # Pickle serialization + deserialization (of the state). pickled_state = pickle.dumps(state) unpickled_state = pickle.loads(pickled_state) self.assertEqual(str(state), str(unpickled_state))
def test_pickle(self): """Checks pickling and unpickling works.""" game = pyspiel.load_game("python_tic_tac_toe") state = game.new_initial_state() for a in [4, 2, 3, 7]: state.apply_action(a) ser_str = pyspiel.serialize_game_and_state(game, state) new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) self.assertEqual(str(game), str(new_game)) self.assertEqual(str(state), str(new_state)) pickled_state = pickle.dumps(state) unpickled_state = pickle.loads(pickled_state) self.assertEqual(str(state), str(unpickled_state))
def get_time_step(self, actions): """Returns a `TimeStep` without updating the environment. Returns: A `TimeStep` namedtuple containing: observation: list of dicts containing one observations per player, each corresponding to `observation_spec()`. reward: list of rewards at this timestep, or None if step_type is `StepType.FIRST`. discount: list of discounts in the range [0, 1], or None if step_type is `StepType.FIRST`. step_type: A `StepType` value. """ observations = { "info_state": [], "legal_actions": [], "current_player": [], "serialized_state": [], "last_action": [] } rewards = [] step_type = StepType.LAST if self._state.is_terminal() else StepType.MID self._should_reset = step_type == StepType.LAST cur_rewards = self._state.rewards() for player_id in range(self.num_players): rewards.append(cur_rewards[player_id]) observations["info_state"].append( self._state.observation_tensor(player_id) if self._use_observation else self._state.information_state_tensor(player_id)) observations["legal_actions"].append(self._state.legal_actions(player_id)) observations["last_action"].append(actions[player_id]) observations["current_player"] = self._state.current_player() discounts = self._discounts if step_type == StepType.LAST: # When the game is in a terminal state set the discount to 0. discounts = [0. for _ in discounts] if self._include_full_state: observations["serialized_state"] = pyspiel.serialize_game_and_state( self._game, self._state) return TimeStep( observations=observations, rewards=rewards, discounts=discounts, step_type=step_type)
def reset(self): """Starts a new sequence and returns the first `TimeStep` of this sequence. Returns: A `TimeStep` namedtuple containing: observations: list of dicts containing one observations per player, each corresponding to `observation_spec()`. rewards: list of rewards at this timestep, or None if step_type is `StepType.FIRST`. discounts: list of discounts in the range [0, 1], or None if step_type is `StepType.FIRST`. step_type: A `StepType` value. """ self._should_reset = False if self._game.get_type( ).dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD and self._num_players > 1: self._state = self._game.new_initial_state_for_population( self._mfg_population) else: self._state = self._game.new_initial_state() self._sample_external_events() observations = { "info_state": [], "legal_actions": [], "current_player": [], "serialized_state": [] } for player_id in range(self.num_players): observations["info_state"].append( self._state.observation_tensor(player_id) if self. _use_observation else self._state. information_state_tensor(player_id)) observations["legal_actions"].append( self._state.legal_actions(player_id)) observations["current_player"] = self._state.current_player() if self._include_full_state: observations[ "serialized_state"] = pyspiel.serialize_game_and_state( self._game, self._state) return TimeStep(observations=observations, rewards=None, discounts=None, step_type=StepType.FIRST)
def serialize_deserialize(self, game, state): ser_str = pyspiel.serialize_game_and_state(game, state) new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) self.assertEqual(str(game), str(new_game)) self.assertEqual(str(state), str(new_state))