def run_simulated_mission(model, display=None, use_delays=False): print("Simulated mission running.") world_model = WorldModel(BLUEPRINT, CONFIG_FILE, simulated=True) ticks_left = 5 * MAX_EPISODE_TIME total_reward = 0 current_r = 0 while (ticks_left > 0 and world_model.is_mission_running()): ticks_left -= 1 current_r = world_model.reward() action = model.act(current_r, world_model.get_observation()) if display is not None: display.update(world_model) total_reward += current_r world_model.simulate(action) if use_delays: print(action) time.sleep(ACTION_DELAY) # Collect last reward, and give to model, then end the mission current_r = world_model.reward() model.act(current_r, world_model.get_observation()) total_reward += current_r model.mission_ended() print("Simulated mission ended") return total_reward, (MAX_EPISODE_TIME - (ticks_left / 5))
def run_simulated_mission(model, mission, cfg, demo=False): print("Simulated mission running.") world_model = WorldModel(mission.blueprint, cfg, simulated=True, agent_pos=mission.start_position) ticks_left = 5 * mission.max_episode_time total_reward = 0 current_r = 0 use_delays = mission.action_delay > 0 while (ticks_left > 0 and world_model.is_mission_running()): ticks_left -= 1 current_r = world_model.reward() if demo: action = model.demo_act(world_model.get_observation()) else: action = model.act(current_r, world_model.get_observation()) if mission.display is not None: mission.display.update(world_model) total_reward += current_r world_model.simulate(action) if use_delays: print(action) time.sleep(mission.action_delay) # Collect last reward, and give to model, then end the mission if mission.display is not None: mission.display.update(world_model) current_r = world_model.reward() if not demo: model.act(current_r, world_model.get_observation()) total_reward += current_r model.mission_ended() print("Simulated mission ended") return MissionStats(reward=total_reward, length=(mission.max_episode_time - (ticks_left / 5)))