Ejemplo n.º 1
0
        def check_transition(action, expected_path, recompute=False):
            # Compute actual values
            state = env.state
            pred_state, _ = self.base_mdp.get_state_transition(state, action)
            new_state, sparse_reward, _, _ = env.step(action)
            self.assertEqual(pred_state, new_state,
                             '\n' + str(pred_state) + '\n' + str(new_state))

            # Recompute expected values if desired
            if recompute:
                actual = {
                    "state": pred_state.to_dict(),
                    "reward": sparse_reward
                }
                save_as_json(actual, expected_path)

            # Compute expected values
            expected = load_from_json(expected_path)
            expected_state = OvercookedState.from_dict(expected['state'])
            expected_reward = expected['reward']

            # Make sure everything lines up (note __eq__ is transitive)
            self.assertTrue(
                pred_state.time_independent_equal(expected_state),
                '\n' + str(pred_state) + '\n' + str(expected_state))
            self.assertEqual(sparse_reward, expected_reward)
Ejemplo n.º 2
0
 def save_traj_as_json(trajectories, filename):
     assert set(OvercookedEnv.DEFAULT_TRAJ_KEYS) == set(
         trajectories.keys()), "{} vs\n{}".format(
             OvercookedEnv.DEFAULT_TRAJ_KEYS, trajectories.keys())
     AgentEvaluator.check_trajectories(trajectories)
     trajectories = AgentEvaluator.make_trajectories_json_serializable(
         trajectories)
     save_as_json(trajectories, filename)
 def save_traj_as_json(trajectory, filename):
     """Saves the `idx`th trajectory as a list of state action pairs"""
     assert set(DEFAULT_TRAJ_KEYS) == set(
         trajectory.keys()), "{} vs\n{}".format(DEFAULT_TRAJ_KEYS,
                                                trajectory.keys())
     AgentEvaluator.check_trajectories(trajectory)
     trajectory = AgentEvaluator.make_trajectories_json_serializable(
         trajectory)
     save_as_json(trajectory, filename)
Ejemplo n.º 4
0
    def save_traj_as_json(trajectory, filename):
        """Saves the `idx`th trajectory as a list of state action pairs"""
        assert set(DEFAULT_TRAJ_KEYS) == set(
            trajectory.keys()), "{} vs\n{}".format(DEFAULT_TRAJ_KEYS,
                                                   trajectory.keys())

        dict_traj = copy.deepcopy(trajectory)
        dict_traj["ep_observations"] = [[
            ob.to_dict() for ob in one_ep_obs
        ] for one_ep_obs in trajectory["ep_observations"]]

        save_as_json(filename, dict_traj)
Ejemplo n.º 5
0
    def test_mdp_serialization(self):
        # Where to store serialized states -- will be overwritten each timestep
        dummy_path = os.path.join(TESTING_DATA_DIR, 'test_mdp_serialization',
                                  'dummy.json')

        # Get starting seed and random agent pair
        seed = 47
        random_pair = AgentPair(RandomAgent(all_actions=True),
                                RandomAgent(all_actions=True))

        # Run rollouts with different seeds until sparse reward is achieved
        sparse_reward = 0
        while sparse_reward <= 0:
            np.random.seed(seed)
            state = self.base_mdp.get_standard_start_state()
            for _ in range(1500):
                # Ensure serialization and deserializations are inverses
                reconstructed_state = OvercookedState.from_dict(
                    load_from_json(save_as_json(state.to_dict(), dummy_path)))
                self.assertEqual(
                    state, reconstructed_state,
                    "\nState: \t\t\t{}\nReconstructed State: \t{}".format(
                        state, reconstructed_state))

                # Advance state
                joint_action, _ = zip(*random_pair.joint_action(state))
                state, infos = self.base_mdp.get_state_transition(
                    state, joint_action)
                sparse_reward += sum(infos['sparse_reward_by_agent'])
            seed += 1
# NOTE: This code was used to create the common test trajectories. The purpose of these
# tests is to sanity check the consistency of dynamics and encodings across the 
# overcooked python and javascript implementations.
# If changing the overcooked environment in ways that affect trajectories, one 
# should also run this file again, and make sure `npm run test` will pass in
# overcooked_ai_js

# Saving trajectory for dynamics consistency test
np.random.seed(0)
ae = AgentEvaluator(mdp_params={"layout_name": "cramped_room"}, env_params={"horizon": 1500})
test_trajs = ae.evaluate_random_pair(all_actions=True, num_games=1)
assert test_trajs["ep_returns"][0] > 0, "Choose a different seed, we should have a test trajectory that gets some reward"

test_trajs_path = COMMON_TESTS_DIR + "trajectory_tests/trajs.json"
AgentEvaluator.save_traj_as_json(test_trajs, test_trajs_path)

# Saving encondings for encoding tests
load_traj = AgentEvaluator.load_traj_from_json(test_trajs_path)
mdp_params = load_traj["mdp_params"][0]
env_params = load_traj["env_params"][0]
mdp = AgentEvaluator(mdp_params, env_params).mdp_fn()
for i in range(2):
    lossless_path = COMMON_TESTS_DIR + "encoding_tests/lossless_py{}.json".format(i)
    encoded_states = [mdp.lossless_state_encoding(s)[i].tolist() for s in np.concatenate(load_traj["ep_states"])]
    save_as_json(encoded_states, lossless_path)

    featurization_path = COMMON_TESTS_DIR + "encoding_tests/featurized_py{}.json".format(i)
    encoded_states = [mdp.featurize_state(s, ae.mlp)[i].tolist() for s in np.concatenate(load_traj["ep_states"])]
    save_as_json(encoded_states, featurization_path)