예제 #1
0
    def test_python_game(self):
        """Checks if the NashConv is consistent through time."""
        game = crowd_modelling.MFGCrowdModellingGame()
        uniform_policy = policy.UniformRandomPolicy(game)
        nash_conv_fp = nash_conv.NashConv(game, uniform_policy)

        self.assertAlmostEqual(nash_conv_fp.nash_conv(), [3.1700299751054217])
    def test_dqn_fp_python_game(self):
        """Checks if fictitious play with DQN-based value function works."""
        game = crowd_modelling.MFGCrowdModellingGame()
        dfp = fictitious_play.FictitiousPlay(game)

        uniform_policy = policy.UniformRandomPolicy(game)
        dist = distribution.DistributionPolicy(game, uniform_policy)
        envs = [
            rl_environment.Environment(game,
                                       mfg_distribution=dist,
                                       mfg_population=p)
            for p in range(game.num_players())
        ]
        dqn_agent = dqn.DQN(
            0,
            state_representation_size=envs[0].observation_spec()["info_state"]
            [0],
            num_actions=envs[0].action_spec()["num_actions"],
            hidden_layers_sizes=[256, 128, 64],
            replay_buffer_capacity=100,
            batch_size=5,
            epsilon_start=0.02,
            epsilon_end=0.01)

        for _ in range(10):
            dfp.iteration(rl_br_agent=dqn_agent)

        dfp_policy = dfp.get_policy()
        nash_conv_dfp = nash_conv.NashConv(game, dfp_policy)

        self.assertAlmostEqual(nash_conv_dfp.nash_conv(), 1.0558451955622807)
예제 #3
0
  def test_random_game(self):
    """Tests basic API functions."""
    np.random.seed(7)
    horizon = 20
    size = 50
    game = crowd_modelling.MFGCrowdModellingGame(params={
        "horizon": horizon,
        "size": size
    })
    state = game.new_initial_state()
    t = 0
    while not state.is_terminal():
      if state.current_player() == pyspiel.PlayerId.CHANCE:
        actions, probs = zip(*state.chance_outcomes())
        action = np.random.choice(actions, p=probs)
        self.check_cloning(state)
        self.assertEqual(len(state.legal_actions()),
                         len(state.chance_outcomes()))
        state.apply_action(action)
      elif state.current_player() == pyspiel.PlayerId.MEAN_FIELD:
        self.assertEqual(state.legal_actions(), [])
        self.check_cloning(state)
        num_states = len(state.distribution_support())
        state.update_distribution([1 / num_states] * num_states)
      else:
        self.assertEqual(state.current_player(), 0)
        self.check_cloning(state)
        state.observation_string()
        state.information_state_string()
        legal_actions = state.legal_actions()
        action = np.random.choice(legal_actions)
        state.apply_action(action)
        t += 1

    self.assertEqual(t, horizon)
예제 #4
0
  def test_distribution(self):
    """Checks that distribution-related functions work."""
    game = crowd_modelling.MFGCrowdModellingGame()
    state = game.new_initial_state()
    self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE)
    state.apply_action(game.size // 2)
    self.assertEqual(state.current_player(), 0)
    # This expected reward assumes that the game is initialized with
    # uniform state distribution.
    self.assertAlmostEqual(state.rewards()[0], 1. + np.log(game.size))

    state.apply_action(crowd_modelling.MFGCrowdModellingState._NEUTRAL_ACTION)

    self.assertEqual(state.current_player(), pyspiel.PlayerId.MEAN_FIELD)
    self.assertEqual(
        state.distribution_support(), [str((x, 0)) for x in range(10)])
    new_distrib = [0.01] * 9 + [1. - 0.01 * 9]
    state.update_distribution(new_distrib)
    self.assertAlmostEqual(state._distribution, new_distrib)

    # Chance node.
    state.apply_action(crowd_modelling.MFGCrowdModellingState._NEUTRAL_ACTION)

    # Check that the distribution is taken into account for the reward
    # computation.
    self.assertAlmostEqual(state.rewards()[0], 1. - np.log(0.01))
 def test_python_game(self):
     """Checks if the value of a policy computation works."""
     game = crowd_modelling.MFGCrowdModellingGame()
     uniform_policy = policy.UniformRandomPolicy(game)
     dist = distribution.DistributionPolicy(game, uniform_policy)
     br_value = best_response_value.BestResponse(game, dist)
     br_val = br_value(game.new_initial_state())
     self.assertAlmostEqual(br_val, 33.09846599803991)
예제 #6
0
 def test_python_game(self):
     """Checks if the value of a policy computation works."""
     game = crowd_modelling.MFGCrowdModellingGame()
     uniform_policy = policy.UniformRandomPolicy(game)
     dist = distribution.DistributionPolicy(game, uniform_policy)
     py_value = policy_value.PolicyValue(game, dist, uniform_policy)
     py_val = py_value(game.new_initial_state())
     self.assertAlmostEqual(py_val, 29.92843602293449)
예제 #7
0
    def test_fp_python_game(self):
        """Checks if fictitious play works."""
        game = crowd_modelling.MFGCrowdModellingGame()
        fp = fictitious_play.FictitiousPlay(game)
        for _ in range(10):
            fp.iteration()
        fp_policy = fp.get_policy()
        nash_conv_fp = nash_conv.NashConv(game, fp_policy)

        self.assertAlmostEqual(nash_conv_fp.nash_conv(), 0.9908032626911343)
예제 #8
0
 def test_random_game(self):
   """Tests basic API functions."""
   horizon = 20
   size = 50
   game = crowd_modelling.MFGCrowdModellingGame(params={
       "horizon": horizon,
       "size": size
   })
   pyspiel.random_sim_test(
       game, num_sims=10, serialize=False, verbose=True)
예제 #9
0
    def test_fp_python_game(self):
        """Checks if mirror descent works."""
        game = crowd_modelling.MFGCrowdModellingGame()
        md = mirror_descent.MirrorDescent(game)
        for _ in range(10):
            md.iteration()
        md_policy = md.get_policy()
        nash_conv_md = nash_conv.NashConv(game, md_policy)

        self.assertAlmostEqual(nash_conv_md.nash_conv(), 2.2730324915546056)
예제 #10
0
 def test_create(self):
   """Checks we can create the game and clone states."""
   game = crowd_modelling.MFGCrowdModellingGame()
   self.assertEqual(game.size, crowd_modelling._SIZE)
   self.assertEqual(game.horizon, crowd_modelling._HORIZON)
   self.assertEqual(game.get_type().dynamics,
                    pyspiel.GameType.Dynamics.MEAN_FIELD)
   print("Num distinct actions:", game.num_distinct_actions())
   state = game.new_initial_state()
   clone = state.clone()
   print("Initial state:", state)
   print("Cloned initial state:", clone)
예제 #11
0
 def test_reward(self):
   game = crowd_modelling.MFGCrowdModellingGame()
   state = game.new_initial_state()
   self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE)
   state.apply_action(game.size // 2)
   self.assertEqual(state.current_player(), 0)
   # This expected reward assumes that the game is initialized with
   # uniform state distribution.
   self.assertAlmostEqual(state.rewards()[0], 1. + np.log(game.size))
   self.assertAlmostEqual(state.returns()[0], 1. + np.log(game.size))
   state.apply_action(1)
   self.assertEqual(state.current_player(), pyspiel.PlayerId.MEAN_FIELD)
   self.assertAlmostEqual(state.returns()[0], 1. + np.log(game.size))
예제 #12
0
    def test_greedy_python(self):
        """Check if the greedy policy works as expected.

    The test checks that a greedy policy with respect to an optimal value is
    an optimal policy.
    """
        game = crowd_modelling.MFGCrowdModellingGame()
        uniform_policy = policy.UniformRandomPolicy(game)
        dist = distribution.DistributionPolicy(game, uniform_policy)
        br_value = best_response_value.BestResponse(game, dist)
        br_val = br_value(game.new_initial_state())

        greedy_pi = greedy_policy.GreedyPolicy(game, None, br_value)
        greedy_pi = greedy_pi.to_tabular()
        pybr_value = policy_value.PolicyValue(game, dist, greedy_pi)
        pybr_val = pybr_value(game.new_initial_state())
        self.assertAlmostEqual(br_val, pybr_val)
예제 #13
0
    def test_average(self):
        """Test the average of policies.

    Here we test that the average of values is the value of the average policy.
    """
        game = crowd_modelling.MFGCrowdModellingGame()
        uniform_policy = policy.UniformRandomPolicy(game)
        mfg_dist = distribution.DistributionPolicy(game, uniform_policy)
        br_value = best_response_value.BestResponse(game, mfg_dist)
        py_value = policy_value.PolicyValue(game, mfg_dist, uniform_policy)
        greedy_pi = greedy_policy.GreedyPolicy(game, None, br_value)
        greedy_pi = greedy_pi.to_tabular()
        merged_pi = fictitious_play.MergedPolicy(
            game, list(range(game.num_players())), [uniform_policy, greedy_pi],
            [mfg_dist,
             distribution.DistributionPolicy(game, greedy_pi)], [0.5, 0.5])
        merged_pi_value = policy_value.PolicyValue(game, mfg_dist, merged_pi)

        self.assertAlmostEqual(merged_pi_value(game.new_initial_state()),
                               (br_value(game.new_initial_state()) +
                                py_value(game.new_initial_state())) / 2)
예제 #14
0
 def test_basic(self):
     game = crowd_modelling.MFGCrowdModellingGame()
     uniform_policy = policy.UniformRandomPolicy(game)
     dist = distribution.DistributionPolicy(game, uniform_policy)
     state = game.new_initial_state().child(0)
     self.assertAlmostEqual(dist.value(state), 1 / game.size)