def test_python_game(self): """Checks if the NashConv is consistent through time.""" game = crowd_modelling.MFGCrowdModellingGame() uniform_policy = policy.UniformRandomPolicy(game) nash_conv_fp = nash_conv.NashConv(game, uniform_policy) self.assertAlmostEqual(nash_conv_fp.nash_conv(), [3.1700299751054217])
def test_dqn_fp_python_game(self): """Checks if fictitious play with DQN-based value function works.""" game = crowd_modelling.MFGCrowdModellingGame() dfp = fictitious_play.FictitiousPlay(game) uniform_policy = policy.UniformRandomPolicy(game) dist = distribution.DistributionPolicy(game, uniform_policy) envs = [ rl_environment.Environment(game, mfg_distribution=dist, mfg_population=p) for p in range(game.num_players()) ] dqn_agent = dqn.DQN( 0, state_representation_size=envs[0].observation_spec()["info_state"] [0], num_actions=envs[0].action_spec()["num_actions"], hidden_layers_sizes=[256, 128, 64], replay_buffer_capacity=100, batch_size=5, epsilon_start=0.02, epsilon_end=0.01) for _ in range(10): dfp.iteration(rl_br_agent=dqn_agent) dfp_policy = dfp.get_policy() nash_conv_dfp = nash_conv.NashConv(game, dfp_policy) self.assertAlmostEqual(nash_conv_dfp.nash_conv(), 1.0558451955622807)
def test_random_game(self): """Tests basic API functions.""" np.random.seed(7) horizon = 20 size = 50 game = crowd_modelling.MFGCrowdModellingGame(params={ "horizon": horizon, "size": size }) state = game.new_initial_state() t = 0 while not state.is_terminal(): if state.current_player() == pyspiel.PlayerId.CHANCE: actions, probs = zip(*state.chance_outcomes()) action = np.random.choice(actions, p=probs) self.check_cloning(state) self.assertEqual(len(state.legal_actions()), len(state.chance_outcomes())) state.apply_action(action) elif state.current_player() == pyspiel.PlayerId.MEAN_FIELD: self.assertEqual(state.legal_actions(), []) self.check_cloning(state) num_states = len(state.distribution_support()) state.update_distribution([1 / num_states] * num_states) else: self.assertEqual(state.current_player(), 0) self.check_cloning(state) state.observation_string() state.information_state_string() legal_actions = state.legal_actions() action = np.random.choice(legal_actions) state.apply_action(action) t += 1 self.assertEqual(t, horizon)
def test_distribution(self): """Checks that distribution-related functions work.""" game = crowd_modelling.MFGCrowdModellingGame() state = game.new_initial_state() self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) state.apply_action(game.size // 2) self.assertEqual(state.current_player(), 0) # This expected reward assumes that the game is initialized with # uniform state distribution. self.assertAlmostEqual(state.rewards()[0], 1. + np.log(game.size)) state.apply_action(crowd_modelling.MFGCrowdModellingState._NEUTRAL_ACTION) self.assertEqual(state.current_player(), pyspiel.PlayerId.MEAN_FIELD) self.assertEqual( state.distribution_support(), [str((x, 0)) for x in range(10)]) new_distrib = [0.01] * 9 + [1. - 0.01 * 9] state.update_distribution(new_distrib) self.assertAlmostEqual(state._distribution, new_distrib) # Chance node. state.apply_action(crowd_modelling.MFGCrowdModellingState._NEUTRAL_ACTION) # Check that the distribution is taken into account for the reward # computation. self.assertAlmostEqual(state.rewards()[0], 1. - np.log(0.01))
def test_python_game(self): """Checks if the value of a policy computation works.""" game = crowd_modelling.MFGCrowdModellingGame() uniform_policy = policy.UniformRandomPolicy(game) dist = distribution.DistributionPolicy(game, uniform_policy) br_value = best_response_value.BestResponse(game, dist) br_val = br_value(game.new_initial_state()) self.assertAlmostEqual(br_val, 33.09846599803991)
def test_python_game(self): """Checks if the value of a policy computation works.""" game = crowd_modelling.MFGCrowdModellingGame() uniform_policy = policy.UniformRandomPolicy(game) dist = distribution.DistributionPolicy(game, uniform_policy) py_value = policy_value.PolicyValue(game, dist, uniform_policy) py_val = py_value(game.new_initial_state()) self.assertAlmostEqual(py_val, 29.92843602293449)
def test_fp_python_game(self): """Checks if fictitious play works.""" game = crowd_modelling.MFGCrowdModellingGame() fp = fictitious_play.FictitiousPlay(game) for _ in range(10): fp.iteration() fp_policy = fp.get_policy() nash_conv_fp = nash_conv.NashConv(game, fp_policy) self.assertAlmostEqual(nash_conv_fp.nash_conv(), 0.9908032626911343)
def test_random_game(self): """Tests basic API functions.""" horizon = 20 size = 50 game = crowd_modelling.MFGCrowdModellingGame(params={ "horizon": horizon, "size": size }) pyspiel.random_sim_test( game, num_sims=10, serialize=False, verbose=True)
def test_fp_python_game(self): """Checks if mirror descent works.""" game = crowd_modelling.MFGCrowdModellingGame() md = mirror_descent.MirrorDescent(game) for _ in range(10): md.iteration() md_policy = md.get_policy() nash_conv_md = nash_conv.NashConv(game, md_policy) self.assertAlmostEqual(nash_conv_md.nash_conv(), 2.2730324915546056)
def test_create(self): """Checks we can create the game and clone states.""" game = crowd_modelling.MFGCrowdModellingGame() self.assertEqual(game.size, crowd_modelling._SIZE) self.assertEqual(game.horizon, crowd_modelling._HORIZON) self.assertEqual(game.get_type().dynamics, pyspiel.GameType.Dynamics.MEAN_FIELD) print("Num distinct actions:", game.num_distinct_actions()) state = game.new_initial_state() clone = state.clone() print("Initial state:", state) print("Cloned initial state:", clone)
def test_reward(self): game = crowd_modelling.MFGCrowdModellingGame() state = game.new_initial_state() self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) state.apply_action(game.size // 2) self.assertEqual(state.current_player(), 0) # This expected reward assumes that the game is initialized with # uniform state distribution. self.assertAlmostEqual(state.rewards()[0], 1. + np.log(game.size)) self.assertAlmostEqual(state.returns()[0], 1. + np.log(game.size)) state.apply_action(1) self.assertEqual(state.current_player(), pyspiel.PlayerId.MEAN_FIELD) self.assertAlmostEqual(state.returns()[0], 1. + np.log(game.size))
def test_greedy_python(self): """Check if the greedy policy works as expected. The test checks that a greedy policy with respect to an optimal value is an optimal policy. """ game = crowd_modelling.MFGCrowdModellingGame() uniform_policy = policy.UniformRandomPolicy(game) dist = distribution.DistributionPolicy(game, uniform_policy) br_value = best_response_value.BestResponse(game, dist) br_val = br_value(game.new_initial_state()) greedy_pi = greedy_policy.GreedyPolicy(game, None, br_value) greedy_pi = greedy_pi.to_tabular() pybr_value = policy_value.PolicyValue(game, dist, greedy_pi) pybr_val = pybr_value(game.new_initial_state()) self.assertAlmostEqual(br_val, pybr_val)
def test_average(self): """Test the average of policies. Here we test that the average of values is the value of the average policy. """ game = crowd_modelling.MFGCrowdModellingGame() uniform_policy = policy.UniformRandomPolicy(game) mfg_dist = distribution.DistributionPolicy(game, uniform_policy) br_value = best_response_value.BestResponse(game, mfg_dist) py_value = policy_value.PolicyValue(game, mfg_dist, uniform_policy) greedy_pi = greedy_policy.GreedyPolicy(game, None, br_value) greedy_pi = greedy_pi.to_tabular() merged_pi = fictitious_play.MergedPolicy( game, list(range(game.num_players())), [uniform_policy, greedy_pi], [mfg_dist, distribution.DistributionPolicy(game, greedy_pi)], [0.5, 0.5]) merged_pi_value = policy_value.PolicyValue(game, mfg_dist, merged_pi) self.assertAlmostEqual(merged_pi_value(game.new_initial_state()), (br_value(game.new_initial_state()) + py_value(game.new_initial_state())) / 2)
def test_basic(self): game = crowd_modelling.MFGCrowdModellingGame() uniform_policy = policy.UniformRandomPolicy(game) dist = distribution.DistributionPolicy(game, uniform_policy) state = game.new_initial_state().child(0) self.assertAlmostEqual(dist.value(state), 1 / game.size)