def check_transition(action, expected_path, recompute=False): # Compute actual values state = env.state pred_state, _ = self.base_mdp.get_state_transition(state, action) new_state, sparse_reward, _, _ = env.step(action) self.assertEqual(pred_state, new_state, '\n' + str(pred_state) + '\n' + str(new_state)) # Recompute expected values if desired if recompute: actual = { "state": pred_state.to_dict(), "reward": sparse_reward } save_as_json(actual, expected_path) # Compute expected values expected = load_from_json(expected_path) expected_state = OvercookedState.from_dict(expected['state']) expected_reward = expected['reward'] # Make sure everything lines up (note __eq__ is transitive) self.assertTrue( pred_state.time_independent_equal(expected_state), '\n' + str(pred_state) + '\n' + str(expected_state)) self.assertEqual(sparse_reward, expected_reward)
def save_traj_as_json(trajectories, filename): assert set(OvercookedEnv.DEFAULT_TRAJ_KEYS) == set( trajectories.keys()), "{} vs\n{}".format( OvercookedEnv.DEFAULT_TRAJ_KEYS, trajectories.keys()) AgentEvaluator.check_trajectories(trajectories) trajectories = AgentEvaluator.make_trajectories_json_serializable( trajectories) save_as_json(trajectories, filename)
def save_traj_as_json(trajectory, filename): """Saves the `idx`th trajectory as a list of state action pairs""" assert set(DEFAULT_TRAJ_KEYS) == set( trajectory.keys()), "{} vs\n{}".format(DEFAULT_TRAJ_KEYS, trajectory.keys()) AgentEvaluator.check_trajectories(trajectory) trajectory = AgentEvaluator.make_trajectories_json_serializable( trajectory) save_as_json(trajectory, filename)
def save_traj_as_json(trajectory, filename): """Saves the `idx`th trajectory as a list of state action pairs""" assert set(DEFAULT_TRAJ_KEYS) == set( trajectory.keys()), "{} vs\n{}".format(DEFAULT_TRAJ_KEYS, trajectory.keys()) dict_traj = copy.deepcopy(trajectory) dict_traj["ep_observations"] = [[ ob.to_dict() for ob in one_ep_obs ] for one_ep_obs in trajectory["ep_observations"]] save_as_json(filename, dict_traj)
def test_mdp_serialization(self): # Where to store serialized states -- will be overwritten each timestep dummy_path = os.path.join(TESTING_DATA_DIR, 'test_mdp_serialization', 'dummy.json') # Get starting seed and random agent pair seed = 47 random_pair = AgentPair(RandomAgent(all_actions=True), RandomAgent(all_actions=True)) # Run rollouts with different seeds until sparse reward is achieved sparse_reward = 0 while sparse_reward <= 0: np.random.seed(seed) state = self.base_mdp.get_standard_start_state() for _ in range(1500): # Ensure serialization and deserializations are inverses reconstructed_state = OvercookedState.from_dict( load_from_json(save_as_json(state.to_dict(), dummy_path))) self.assertEqual( state, reconstructed_state, "\nState: \t\t\t{}\nReconstructed State: \t{}".format( state, reconstructed_state)) # Advance state joint_action, _ = zip(*random_pair.joint_action(state)) state, infos = self.base_mdp.get_state_transition( state, joint_action) sparse_reward += sum(infos['sparse_reward_by_agent']) seed += 1
# NOTE: This code was used to create the common test trajectories. The purpose of these # tests is to sanity check the consistency of dynamics and encodings across the # overcooked python and javascript implementations. # If changing the overcooked environment in ways that affect trajectories, one # should also run this file again, and make sure `npm run test` will pass in # overcooked_ai_js # Saving trajectory for dynamics consistency test np.random.seed(0) ae = AgentEvaluator(mdp_params={"layout_name": "cramped_room"}, env_params={"horizon": 1500}) test_trajs = ae.evaluate_random_pair(all_actions=True, num_games=1) assert test_trajs["ep_returns"][0] > 0, "Choose a different seed, we should have a test trajectory that gets some reward" test_trajs_path = COMMON_TESTS_DIR + "trajectory_tests/trajs.json" AgentEvaluator.save_traj_as_json(test_trajs, test_trajs_path) # Saving encondings for encoding tests load_traj = AgentEvaluator.load_traj_from_json(test_trajs_path) mdp_params = load_traj["mdp_params"][0] env_params = load_traj["env_params"][0] mdp = AgentEvaluator(mdp_params, env_params).mdp_fn() for i in range(2): lossless_path = COMMON_TESTS_DIR + "encoding_tests/lossless_py{}.json".format(i) encoded_states = [mdp.lossless_state_encoding(s)[i].tolist() for s in np.concatenate(load_traj["ep_states"])] save_as_json(encoded_states, lossless_path) featurization_path = COMMON_TESTS_DIR + "encoding_tests/featurized_py{}.json".format(i) encoded_states = [mdp.featurize_state(s, ae.mlp)[i].tolist() for s in np.concatenate(load_traj["ep_states"])] save_as_json(encoded_states, featurization_path)