class PointNaviThorRGBDPPOExperimentConfig( PointNaviThorBaseConfig, PointNavThorMixInPPOConfig, PointNavMixInSimpleConvGRUConfig, ): """An Point Navigation experiment configuration in iThor with RGBD input.""" SENSORS = [ RGBSensorThor( height=PointNaviThorBaseConfig.SCREEN_SIZE, width=PointNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=PointNaviThorBaseConfig.SCREEN_SIZE, width=PointNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GPSCompassSensorRoboThor(), ] @classmethod def tag(cls): return "Pointnav-iTHOR-RGBD-SimpleConv-DDPPO"
class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNaviThorBaseConfig, ObjectNavMixInPPOConfig, ObjectNavMixInResNetGRUConfig): """An Object Navigation experiment configuration in iThor with RGBD input.""" SENSORS = [ RGBSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNaviThorBaseConfig.TARGET_TYPES, ), ] @classmethod def tag(cls): return "Objectnav-iTHOR-RGBD-ResNetGRU-DDPPO"
class PointNaviThorDepthPPOExperimentConfig( PointNaviThorBaseConfig, PointNavThorMixInPPOAndGBCConfig, PointNavMixInSimpleConvGRUConfig, ): """An Point Navigation experiment configuration in iThor with Depth input.""" SENSORS = PointNavThorMixInPPOAndGBCConfig.SENSORS + ( # type:ignore DepthSensorThor( height=PointNaviThorBaseConfig.SCREEN_SIZE, width=PointNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GPSCompassSensorRoboThor(), ) @classmethod def tag(cls): return "Pointnav-iTHOR-Depth-SimpleConv-DDPPOAndGBC"
class WalkthroughRGBMappingPPOExperimentConfig(WalkthroughBaseExperimentConfig): ORDERED_OBJECT_TYPES = list(sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS)) MAP_RANGE_SENSOR = ReachableBoundsTHORSensor(margin=1.0) MAP_INFO = dict( map_range_sensor=MAP_RANGE_SENSOR, vision_range_in_cm=40 * 5, map_size_in_cm=1050 if isinstance(MAP_RANGE_SENSOR, ReachableBoundsTHORSensor) else 2200, resolution_in_cm=5, ) SENSORS = WalkthroughBaseExperimentConfig.SENSORS + [ RelativePositionChangeTHORSensor(), MAP_RANGE_SENSOR, DepthSensorThor( height=WalkthroughBaseExperimentConfig.SCREEN_SIZE, width=WalkthroughBaseExperimentConfig.SCREEN_SIZE, use_normalization=False, uuid="depth", ), BinnedPointCloudMapTHORSensor(fov=FOV, **MAP_INFO), SemanticMapTHORSensor( fov=FOV, **MAP_INFO, ordered_object_types=ORDERED_OBJECT_TYPES, ), ] @classmethod def tag(cls) -> str: return "WalkthroughRGBMappingPPO" @classmethod def num_train_processes(cls) -> int: return max(1, torch.cuda.device_count() * 5) @classmethod def create_model(cls, **kwargs) -> WalkthroughActorCriticResNetWithPassiveMap: map_sensor = cast( BinnedPointCloudMapTHORSensor, next( s for s in cls.SENSORS if isinstance(s, BinnedPointCloudMapTHORSensor) ), ) map_kwargs = dict( frame_height=224, frame_width=224, vision_range_in_cm=map_sensor.vision_range_in_cm, resolution_in_cm=map_sensor.resolution_in_cm, map_size_in_cm=map_sensor.map_size_in_cm, ) observation_space = ( SensorSuite(cls.SENSORS).observation_spaces if kwargs.get("sensor_preprocessor_graph") is None else kwargs["sensor_preprocessor_graph"].observation_spaces ) return WalkthroughActorCriticResNetWithPassiveMap( action_space=gym.spaces.Discrete(len(cls.actions())), observation_space=observation_space, rgb_uuid=cls.EGOCENTRIC_RGB_UUID, unshuffled_rgb_uuid=cls.UNSHUFFLED_RGB_UUID, semantic_map_channels=len(cls.ORDERED_OBJECT_TYPES), height_map_channels=3, map_kwargs=map_kwargs, ) @classmethod def _training_pipeline_info(cls, **kwargs) -> Dict[str, Any]: """Define how the model trains.""" training_steps = cls.TRAINING_STEPS return dict( named_losses=dict( ppo_loss=PPO(clip_decay=LinearDecay(training_steps), **PPOConfig), binned_map_loss=BinnedPointCloudMapLoss( binned_pc_uuid="binned_pc_map", map_logits_uuid="ego_height_binned_map_logits", ), semantic_map_loss=SemanticMapFocalLoss( semantic_map_uuid="semantic_map", map_logits_uuid="ego_semantic_map_logits", ), ), pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "binned_map_loss", "semantic_map_loss"], loss_weights=[1.0, 1.0, 100.0], max_stage_steps=training_steps, ) ], num_steps=32, num_mini_batch=1, update_repeats=3, use_lr_decay=True, lr=3e-4, )
class ObjectNavRoboThorRGBPPOGestureExperimentConfig(ExperimentConfig, ABC): """An Object Navigation experiment configuration in iThor with RGBD and gesture input.""" TARGET_TYPES = tuple( sorted( [ 'AlarmClock', 'Apple', 'ArmChair', 'Bed', 'Box', 'Bread', 'ButterKnife', 'Chair', 'CoffeeTable', 'Cup', 'DeskLamp', 'DiningTable', 'FloorLamp', 'Fork', 'HandTowel', 'HandTowelHolder', 'Knife', 'Laptop', 'Newspaper', 'PaperTowelRoll', 'Plate', 'Plunger', 'Poster', 'Potato', 'RemoteControl', 'ShowerCurtain', 'ShowerDoor', 'SideTable', 'Sofa', 'Spoon', 'Television', 'TissueBox', 'ToiletPaper', 'ToiletPaperHanger', 'Tomato', 'Towel', 'TowelHolder', 'Window' ] ) ) ROOM_TYPES = tuple( sorted( [ "Kitchen", "LivingRoom", "Bathroom", "Bedroom", ] ) ) STEP_SIZE = 0.25 ROTATION_DEGREES = 15.0 VISIBILITY_DISTANCE = 1.5 STOCHASTIC = False HORIZONTAL_FIELD_OF_VIEW = 90 CAMERA_WIDTH = 224 CAMERA_HEIGHT = 224 SCREEN_SIZE = 224 MAX_STEPS = 100 NUM_PROCESSES = 10 TRAIN_GPU_IDS = list(range(torch.cuda.device_count())) SAMPLER_GPU_IDS = TRAIN_GPU_IDS VALID_GPU_IDS = [torch.cuda.device_count() - 1] TEST_GPU_IDS = [torch.cuda.device_count() - 1] TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav-gesture/train") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav-gesture/val") TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav-gesture/test") ADVANCE_SCENE_ROLLOUT_PERIOD = None THOR_COMMIT_ID = "bad5bc2b250615cb766ffb45d455c211329af17e" # try: # with open("instruction_tokens.txt", "r") as f: # INSTRUCTION_TOKENS = tuple(sorted(map(lambda a:a.strip('\n'), f.readlines()))) # except: # raise Exception("Cannot read instruction tokens from the loaded text file.") SENSORS = [ RGBSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorGestureSensor(object_types=TARGET_TYPES,), # RelativePositionTHORSensor(uuid="rel_position"), GestureDatasetSensor(uuid="gestures"), HumanPoseSensor(uuid="human_poses"), ] def __init__(self, **kwargs): self.REWARD_CONFIG = { "step_penalty": -0.001, "goal_success_reward": 1.0, "failed_stop_reward": -0.01, "collision_reward": -0.005, "shaping_weight": 0.0, } # TODO Gesture add collision penalty self.recording_percentage=float(kwargs["recording_percentage"]) self.prediction_percentage=float(kwargs["prediction_percentage"]) self.add_intervention=bool(kwargs["add_intervention"]) self.smoothed=bool(kwargs["smoothed"]) self.room_type=str(kwargs["room_type"]) self.use_gesture=bool(kwargs["use_gesture"]) self.random=bool(kwargs["random"]) if "random" in kwargs else False # if not self.use_gesture: # self.SENSORS = self.SENSORS[:4] @classmethod def tag(cls): return "Objectnav-iTHOR-RGBD-Gesture-ResNetGRU-DDPPO" @classmethod def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: preprocessors = [] rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensor)), None) if rgb_sensor is not None: preprocessors.append( ResNetPreprocessor( input_height=cls.SCREEN_SIZE, input_width=cls.SCREEN_SIZE, output_width=7, output_height=7, output_dims=512, pool=False, torchvision_resnet_model=models.resnet18, input_uuids=[rgb_sensor.uuid], output_uuid="rgb_resnet", ) ) depth_sensor = next( (s for s in cls.SENSORS if isinstance(s, DepthSensor)), None ) if depth_sensor is not None: preprocessors.append( ResNetPreprocessor( input_height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, input_width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, output_width=7, output_height=7, output_dims=512, pool=False, torchvision_resnet_model=models.resnet18, input_uuids=[depth_sensor.uuid], output_uuid="depth_resnet", ) ) return preprocessors # @classmethod def create_model(self, **kwargs) -> nn.Module: has_rgb = any(isinstance(s, RGBSensor) for s in self.SENSORS) has_depth = any(isinstance(s, DepthSensor) for s in self.SENSORS) goal_sensor_uuid = next( (s.uuid for s in self.SENSORS if isinstance(s, GoalObjectTypeThorGestureSensor)), None, ) # rel_position_uuid = next( # (s.uuid for s in self.SENSORS if isinstance(s, RelativePositionTHORSensor)), # None, # ) gesture_sensor_uuid = next( (s.uuid for s in self.SENSORS if isinstance(s, GestureDatasetSensor)), None, ) human_pose_uuid = next( (s.uuid for s in self.SENSORS if isinstance(s, HumanPoseSensor)), None, ) for s in self.SENSORS: if isinstance(s, GestureDatasetSensor): s.add_intervention = self.add_intervention s.use_gesture = self.use_gesture for s in self.SENSORS: if isinstance(s, HumanPoseSensor): s.use_gesture = self.use_gesture # if self.use_gesture: return ResnetTensorObjectGestureNavActorCritic( action_space=gym.spaces.Discrete(len(ObjectGestureNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, # rel_position_uuid=rel_position_uuid, gesture_sensor_uuid=gesture_sensor_uuid, human_pose_uuid=human_pose_uuid, rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None, depth_resnet_preprocessor_uuid="depth_resnet" if has_depth else None, hidden_size=512, goal_dims=512, gesture_compressor_hidden_out_dim=512, human_pose_hidden_out_dim=512, ) # else: # return ResnetTensorObjectNavActorCritic( # action_space=gym.spaces.Discrete(len(ObjectGestureNavTask.class_action_names())), # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, # goal_sensor_uuid=goal_sensor_uuid, # rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None, # depth_resnet_preprocessor_uuid="depth_resnet" if has_depth else None, # hidden_size=512, # goal_dims=32, # ) @classmethod def env_args(cls): assert cls.THOR_COMMIT_ID is not None return dict( width=cls.CAMERA_WIDTH, height=cls.CAMERA_HEIGHT, quality="Very Low", commit_id=cls.THOR_COMMIT_ID, continuousMode=True, applyActionNoise=cls.STOCHASTIC, agentType="stochastic", rotateStepDegrees=cls.ROTATION_DEGREES, visibilityDistance=cls.VISIBILITY_DISTANCE, gridSize=cls.STEP_SIZE, snapToGrid=False, agentMode="default", fieldOfView=horizontal_to_vertical_fov( horizontal_fov_in_degrees=cls.HORIZONTAL_FIELD_OF_VIEW, width=cls.CAMERA_WIDTH, height=cls.CAMERA_HEIGHT, ), include_private_scenes=False, renderDepthImage=any(isinstance(s, DepthSensorThor) for s in cls.SENSORS), # local_executable_path = "/home/kevin57/Unity\ Projects/ithor_env/ithor_env.x86_64", # local_executable_path = "/home/qi/ithor_env/ithor_env.x86_64", ) def machine_params(self, mode="train", **kwargs): sampler_devices: Sequence[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAIN_GPU_IDS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) sampler_devices = self.SAMPLER_GPU_IDS elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS elif mode == "test": nprocesses = 5 if torch.cuda.is_available() else 1 gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensors = [*self.SENSORS] if mode != "train": sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(sensors).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) # @classmethod def make_sampler_fn(self, **kwargs) -> TaskSampler: return ObjectGestureNavDatasetTaskSampler(self.recording_percentage, self.prediction_percentage, self.smoothed, **kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, devices: Optional[List[int]], seeds: Optional[List[int]], deterministic_cudnn: bool, include_expert_sensor: bool = True, allow_oversample: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] scenes = list(sorted(scenes, key=lambda x: int(x.strip("FloorPlan")))) scene_len = len(scenes) # Get scenes according to room type if self.room_type == "all": scenes = scenes[:] elif self.room_type == "kitchen": scenes = scenes[:scene_len//4] elif self.room_type == "livingroom": scenes = scenes[scene_len//4:scene_len//2] elif self.room_type == "bedroom": scenes = scenes[scene_len//2:scene_len*3//4] elif self.room_type == "bathroom": scenes = scenes[scene_len*3//4:] else: raise ValueError("Please give a valid room type") if len(scenes) == 0: raise RuntimeError( ( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done." ).format(scenes_dir) ) oversample_warning = ( f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})." " You can avoid this by setting a number of workers divisible by the number of scenes" ) if total_processes > len(scenes): # oversample some scenes -> bias if not allow_oversample: raise RuntimeError( f"Cannot have `total_processes > len(scenes)`" f" ({total_processes} > {len(scenes)}) when `allow_oversample` is `False`." ) if total_processes % len(scenes) != 0: get_logger().warning(oversample_warning) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] elif len(scenes) % total_processes != 0: get_logger().warning(oversample_warning) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": [ s for s in self.SENSORS if (include_expert_sensor or not isinstance(s, ExpertActionSensor)) ], "action_space": gym.spaces.Discrete( len(ObjectGestureNavTask.class_action_names()) ), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, "env_args": { **self.env_args(), # """ "x_display": ( f"0.{devices[process_ind % len(devices)]}" if devices is not None and len(devices) > 0 and devices[process_ind % len(devices)] >= 0 else None ), # """ # "x_display": "1", }, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( scenes_dir=os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, allow_oversample=True, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["allow_flipping"] = True return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( scenes_dir=os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, include_expert_sensor=False, allow_oversample=False, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if self.TEST_DATASET_DIR is None: get_logger().warning( "No test dataset dir detected, running test on validation set instead." ) return self.valid_task_sampler_args( process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) else: res = self._get_sampler_args_for_scene_split( scenes_dir=os.path.join(self.TEST_DATASET_DIR, "episodes"), process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, include_expert_sensor=False, allow_oversample=False, ) res["env_args"]["all_metadata_available"] = True # TODO Gesture we can log all metrics (sr and spl) by setting this to true WHY? res["rewards_config"] = {**res["rewards_config"], "shaping_weight": 0} res["scene_directory"] = self.TEST_DATASET_DIR res["loop_dataset"] = False return res def training_pipeline(self, **kwargs): ppo_steps = int(10000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 1000000 log_interval = self.MAX_STEPS*10 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), )
def test_binned_and_semantic_mapping(self, tmpdir): try: if not self.setup_path_for_use_with_rearrangement_project(): return from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig from baseline_configs.walkthrough.walkthrough_rgb_base import ( WalkthroughBaseExperimentConfig, ) from rearrange.constants import ( FOV, PICKUPABLE_OBJECTS, OPENABLE_OBJECTS, ) from datagen.datagen_utils import get_scenes ORDERED_OBJECT_TYPES = list( sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS)) map_range_sensor = ReachableBoundsTHORSensor(margin=1.0) map_info = dict( map_range_sensor=map_range_sensor, vision_range_in_cm=40 * 5, map_size_in_cm=1050, resolution_in_cm=5, ) map_sensors = [ RelativePositionChangeTHORSensor(), map_range_sensor, DepthSensorThor( height=224, width=224, use_normalization=False, uuid="depth", ), BinnedPointCloudMapTHORSensor( fov=FOV, ego_only=False, **map_info, ), SemanticMapTHORSensor( fov=FOV, ego_only=False, ordered_object_types=ORDERED_OBJECT_TYPES, **map_info, ), ] all_sensors = [ *WalkthroughBaseExperimentConfig.SENSORS, *map_sensors ] open_x_displays = [] try: open_x_displays = get_open_x_displays() except (AssertionError, IOError): pass walkthrough_task_sampler = WalkthroughBaseExperimentConfig.make_sampler_fn( stage="train", sensors=all_sensors, scene_to_allowed_rearrange_inds={ s: [0] for s in get_scenes("train") }, force_cache_reset=True, allowed_scenes=None, seed=1, x_display=open_x_displays[0] if len(open_x_displays) != 0 else None, thor_controller_kwargs={ **RearrangeBaseExperimentConfig.THOR_CONTROLLER_KWARGS, # "server_class": ai2thor.wsgi_server.WsgiServer, # Only for debugging }, ) targets_path = os.path.join(tmpdir, "rearrange_mapping_examples.pkl.gz") urllib.request.urlretrieve( "https://ai2-prior-allenact-public-test.s3-us-west-2.amazonaws.com/ai2thor_mapping/rearrange_mapping_examples.pkl.gz", targets_path, ) goal_obs_dict = compress_pickle.load(targets_path) def compare_recursive(obs, goal_obs, key_list: List): if isinstance(obs, Dict): for k in goal_obs: compare_recursive(obs=obs[k], goal_obs=goal_obs[k], key_list=key_list + [k]) elif isinstance(obs, (List, Tuple)): for i in range(len(goal_obs)): compare_recursive(obs=obs[i], goal_obs=goal_obs[i], key_list=key_list + [i]) else: # Should be a numpy array at this point assert isinstance(obs, np.ndarray) and isinstance( goal_obs, np.ndarray ), f"After {key_list}, not numpy arrays, obs={obs}, goal_obs={goal_obs}" obs = 1.0 * obs goal_obs = 1.0 * goal_obs where_nan = np.isnan(goal_obs) obs[where_nan] = 0.0 goal_obs[where_nan] = 0.0 assert ( np.abs(1.0 * obs - 1.0 * goal_obs).mean() < 1e-4 ), f"Difference of {np.abs(1.0 * obs - 1.0 * goal_obs).mean()} at {key_list}." observations_dict = defaultdict(lambda: []) for i in range(5): # Why 5, why not 5? set_seed(i) task = walkthrough_task_sampler.next_task() obs_list = observations_dict[i] obs_list.append(task.get_observations()) k = 0 compare_recursive(obs=obs_list[0], goal_obs=goal_obs_dict[i][0], key_list=[i, k]) while not task.is_done(): obs = task.step(action=task.action_names().index( random.choice(3 * [ "move_ahead", "rotate_right", "rotate_left", "look_up", "look_down", ] + ["done"]))).observation k += 1 obs_list.append(obs) compare_recursive( obs=obs, goal_obs=goal_obs_dict[i][task.num_steps_taken()], key_list=[i, k], ) # Free space metric map in RGB using pointclouds coming from depth images. This # is built iteratively after every step. # R - is used to encode points at a height < 0.02m (i.e. the floor) # G - is used to encode points at a height between 0.02m and 2m, i.e. objects the agent would run into # B - is used to encode points higher than 2m, i.e. ceiling # Uncomment if you wish to visualize the observations: # import matplotlib.pyplot as plt # plt.imshow( # np.flip(255 * (obs["binned_pc_map"]["map"] > 0), 0) # ) # np.flip because we expect "up" to be -row # plt.title("Free space map") # plt.show() # plt.close() # See also `obs["binned_pc_map"]["egocentric_update"]` to see the # the metric map from the point of view of the agent before it is # rotated into the world-space coordinates and merged with past observations. # Semantic map in RGB which is iteratively revealed using depth maps to figure out what # parts of the scene the agent has seen so far. # This map has shape 210x210x72 with the 72 channels corresponding to the 72 # object types in `ORDERED_OBJECT_TYPES` semantic_map = obs["semantic_map"]["map"] # We can't display all 72 channels in an RGB image so instead we randomly assign # each object a color and then just allow them to overlap each other colored_semantic_map = SemanticMapBuilder.randomly_color_semantic_map( semantic_map) # Here's the full semantic map with nothing masked out because the agent # hasn't seen it yet colored_semantic_map_no_fog = SemanticMapBuilder.randomly_color_semantic_map( map_sensors[-1].semantic_map_builder. ground_truth_semantic_map) # Uncomment if you wish to visualize the observations: # import matplotlib.pyplot as plt # plt.imshow( # np.flip( # np.flip because we expect "up" to be -row # np.concatenate( # ( # colored_semantic_map, # 255 + 0 * colored_semantic_map[:, :10, :], # colored_semantic_map_no_fog, # ), # axis=1, # ), # 0, # ) # ) # plt.title("Semantic map with and without exploration fog") # plt.show() # plt.close() # See also # * `obs["semantic_map"]["egocentric_update"]` # * `obs["semantic_map"]["explored_mask"]` # * `obs["semantic_map"]["egocentric_mask"]` # To save observations for comparison against future runs, uncomment the below. # os.makedirs("tmp_out", exist_ok=True) # compress_pickle.dump( # {**observations_dict}, "tmp_out/rearrange_mapping_examples.pkl.gz" # ) finally: try: walkthrough_task_sampler.close() except NameError: pass