Esempio n. 1
0
 def test_child_function_expected_behavior_for_sim_game(self):
     """Test expected behavior of child on simultaneous games."""
     game = pyspiel.load_game("python_iterated_prisoners_dilemma")
     parameter_state = game.new_initial_state()
     actions = [1, 1]
     new_state = policy.child(parameter_state, actions)
     self.assertEqual(str(new_state), ("p0:D p1:D"))
Esempio n. 2
0
 def test_child_function_expected_behavior_for_seq_game(self):
     """Test expected behavior of child on sequential games."""
     game = pyspiel.load_game("tic_tac_toe")
     initial_state = game.new_initial_state()
     action = 3
     new_state = policy.child(initial_state, action)
     self.assertNotEqual(new_state.history(), initial_state.history())
     expected_new_state = initial_state.child(action)
     self.assertNotEqual(new_state, expected_new_state)
     self.assertEqual(new_state.history(), expected_new_state.history())
Esempio n. 3
0
 def decision_nodes(self, parent_state):
     """Yields a (state, cf_prob) pair for each descendant decision node."""
     if not parent_state.is_terminal():
         if (parent_state.current_player() == self._player_id
                 or parent_state.is_simultaneous_node()):
             yield (parent_state, 1.0)
         for action, p_action in self.transitions(parent_state):
             for state, p_state in self.decision_nodes(
                     openspiel_policy.child(parent_state, action)):
                 yield (state, p_state * p_action)
def policy_value(state,
                 policies: Union[List[policy.Policy], policy.Policy],
                 probability_threshold: float = 0):
    """Returns the expected values for the state for players following `policies`.

  Computes the expected value of the`state` for each player, assuming player `i`
  follows the policy given in `policies[i]`.

  Args:
    state: A `pyspiel.State`.
    policies: A `list` of `policy.Policy` objects, one per player for sequential
      games, one policy for simulatenous games.
    probability_threshold: only sum over entries with prob greater than this
      (default: 0).

  Returns:
    A `numpy.array` containing the expected value for each player.
  """
    if state.is_terminal():
        return np.array(state.returns())
    else:
        return sum(prob * policy_value(policy.child(state, action), policies)
                   for action, prob in _transitions(state, policies)
                   if prob > probability_threshold)
Esempio n. 5
0
 def test_child_function_failure_behavior_for_sim_game(self):
     """Test failure behavior of child on simultaneous games."""
     game = pyspiel.load_game("python_iterated_prisoners_dilemma")
     parameter_state = game.new_initial_state()
     with self.assertRaises(AssertionError):
         policy.child(parameter_state, 0)