def test_child_function_expected_behavior_for_sim_game(self): """Test expected behavior of child on simultaneous games.""" game = pyspiel.load_game("python_iterated_prisoners_dilemma") parameter_state = game.new_initial_state() actions = [1, 1] new_state = policy.child(parameter_state, actions) self.assertEqual(str(new_state), ("p0:D p1:D"))
def test_child_function_expected_behavior_for_seq_game(self): """Test expected behavior of child on sequential games.""" game = pyspiel.load_game("tic_tac_toe") initial_state = game.new_initial_state() action = 3 new_state = policy.child(initial_state, action) self.assertNotEqual(new_state.history(), initial_state.history()) expected_new_state = initial_state.child(action) self.assertNotEqual(new_state, expected_new_state) self.assertEqual(new_state.history(), expected_new_state.history())
def decision_nodes(self, parent_state): """Yields a (state, cf_prob) pair for each descendant decision node.""" if not parent_state.is_terminal(): if (parent_state.current_player() == self._player_id or parent_state.is_simultaneous_node()): yield (parent_state, 1.0) for action, p_action in self.transitions(parent_state): for state, p_state in self.decision_nodes( openspiel_policy.child(parent_state, action)): yield (state, p_state * p_action)
def policy_value(state, policies: Union[List[policy.Policy], policy.Policy], probability_threshold: float = 0): """Returns the expected values for the state for players following `policies`. Computes the expected value of the`state` for each player, assuming player `i` follows the policy given in `policies[i]`. Args: state: A `pyspiel.State`. policies: A `list` of `policy.Policy` objects, one per player for sequential games, one policy for simulatenous games. probability_threshold: only sum over entries with prob greater than this (default: 0). Returns: A `numpy.array` containing the expected value for each player. """ if state.is_terminal(): return np.array(state.returns()) else: return sum(prob * policy_value(policy.child(state, action), policies) for action, prob in _transitions(state, policies) if prob > probability_threshold)
def test_child_function_failure_behavior_for_sim_game(self): """Test failure behavior of child on simultaneous games.""" game = pyspiel.load_game("python_iterated_prisoners_dilemma") parameter_state = game.new_initial_state() with self.assertRaises(AssertionError): policy.child(parameter_state, 0)