def training_pipeline(self, **kwargs): ppo_steps = int(300000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 action_strs = ObjectNavTask.class_action_names() non_end_action_inds_set = { i for i, a in enumerate(action_strs) if a != robothor_constants.END } end_action_ind_set = {action_strs.index(robothor_constants.END)} return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "grouped_action_imitation": GroupedActionImitation( nactions=len(ObjectNavTask.class_action_names()), action_groups=[ non_end_action_inds_set, end_action_ind_set ], ), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "grouped_action_imitation"], max_stage_steps=ppo_steps, ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def next_task( self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.episode_index >= len( self.episodes[self.scenes[self.scene_index]]): self.scene_index = (self.scene_index + 1) % len(self.scenes) # shuffle the new list of episodes to train on random.shuffle(self.episodes[self.scenes[self.scene_index]]) self.episode_index = 0 scene = self.scenes[self.scene_index] episode = self.episodes[scene][self.episode_index] if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", ""): self.env.reset( scene_name=scene, filtered_objects=list( set([e["object_id"] for e in self.episodes[scene]])), ) else: self.env = self._create_environment() self.env.reset( scene_name=scene, filtered_objects=list( set([e["object_id"] for e in self.episodes[scene]])), ) task_info = {"scene": scene, "object_type": episode["object_type"]} if len(task_info) == 0: get_logger().warning("Scene {} does not contain any" " objects of any of the types {}.".format( scene, self.object_types)) task_info["initial_position"] = episode["initial_position"] task_info["initial_orientation"] = episode["initial_orientation"] task_info["distance_to_target"] = episode["shortest_path_length"] task_info["path_to_target"] = episode["shortest_path"] task_info["object_type"] = episode["object_type"] task_info["id"] = episode["id"] if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False self.episode_index += 1 if self.max_tasks is not None: self.max_tasks -= 1 if not self.env.teleport(episode["initial_position"], episode["initial_orientation"]): return self.next_task() self._last_sampled_task = ObjectNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task
def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorObjectNavActorCritic( action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), observation_space=kwargs["observation_set"].observation_spaces, goal_sensor_uuid="goal_object_type_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, )
def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [ scene.split("/")[-1].split(".")[0] for scene in glob.glob(path) ] if len(scenes) == 0: raise RuntimeError(( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done.").format(scenes_dir)) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, }
def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = (scenes_dir + "*.json.gz" if scenes_dir[-1] == "/" else scenes_dir + "/*.json.gz") scenes = [ scene.split("/")[-1].split(".")[0] for scene in glob.glob(path) ] if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, }
def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, # applied to the decrease in distance to target }, }
def create_model(cls, **kwargs) -> nn.Module: has_rgb = any(isinstance(s, RGBSensor) for s in cls.SENSORS) has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS) goal_sensor_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), None, ) return ResnetTensorObjectNavActorCritic( action_space=gym.spaces.Discrete( len(ObjectNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"]. observation_spaces, goal_sensor_uuid=goal_sensor_uuid, rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None, depth_resnet_preprocessor_uuid="depth_resnet" if has_depth else None, hidden_size=512, goal_dims=32, )
def __init__(self): super().__init__() self.REWARD_CONFIG["shaping_weight"] = 0 self.SENSORS = [ RGBSensorThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor(object_types=self.TARGET_TYPES, ), ExpertActionSensor(nactions=len( ObjectNavTask.class_action_names()), ), ] self.PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, { "input_height": self.SCREEN_SIZE, "input_width": self.SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", "parallel": False, }, ), ] self.OBSERVATIONS = [ "rgb_resnet", "goal_object_type_ind", "expert_action", ]
class ObjectNaviThorRGBDAggerExperimentConfig( ObjectNavRoboThorBaseConfig, ObjectNavMixInDAggerConfig, ObjectNavMixInResNetGRUConfig, ): """An Object Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ExpertActionSensor(nactions=len(ObjectNavTask.class_action_names()), ), ] @classmethod def tag(cls): return "Objectnav-RoboTHOR-RGB-ResNetGRU-DAgger"
def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, devices: Optional[List[int]], seeds: Optional[List[int]], deterministic_cudnn: bool, include_expert_sensor: bool = True, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [ scene.split("/")[-1].split(".")[0] for scene in glob.glob(path) ] if len(scenes) == 0: raise RuntimeError(( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done.").format(scenes_dir)) oversample_warning = ( f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})." " You can avoid this by setting a number of workers divisible by the number of scenes" ) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: get_logger().warning(oversample_warning) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] elif len(scenes) % total_processes != 0: get_logger().warning(oversample_warning) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": [ s for s in self.SENSORS if (include_expert_sensor or not isinstance(s, ExpertActionSensor)) ], "action_space": gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, "env_args": { **self.ENV_ARGS, "x_display": (f"0.{devices[process_ind % len(devices)]}" if devices is not None and len(devices) > 0 and devices[process_ind % len(devices)] >= 0 else None), }, }
def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: # get_logger().debug("max_tasks {}".format(self.max_tasks)) return None if not self.scenes_is_dataset: scene = self.sample_scene(force_advance_scene) if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) pose = self.env.randomize_agent_location() object_types_in_scene = set( [o["objectType"] for o in self.env.last_event.metadata["objects"]] ) task_info = {"scene": scene} for ot in random.sample(self.object_types, len(self.object_types)): if ot in object_types_in_scene: task_info["object_type"] = ot break if len(task_info) == 0: get_logger().warning( "Scene {} does not contain any" " objects of any of the types {}.".format(scene, self.object_types) ) task_info["initial_position"] = {k: pose[k] for k in ["x", "y", "z"]} task_info["initial_orientation"] = cast(Dict[str, float], pose["rotation"])[ "y" ] else: assert self.max_tasks is not None next_task_id = self.dataset_first + self.max_tasks - 1 # get_logger().debug("task {}".format(next_task_id)) assert ( self.dataset_first <= next_task_id <= self.dataset_last ), "wrong task_id {} for min {} max {}".format( next_task_id, self.dataset_first, self.dataset_last ) task_info = copy.deepcopy(self.dataset_episodes[next_task_id]) scene = task_info["scene"] if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene_name=scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) self.env.step( { "action": "TeleportFull", **{k: float(v) for k, v in task_info["initial_position"].items()}, "rotation": { "x": 0.0, "y": float(task_info["initial_orientation"]), "z": 0.0, }, "horizon": 0.0, } ) assert self.env.last_action_success, "Failed to reset agent for {}".format( task_info ) self.max_tasks -= 1 # task_info["actions"] = [] # TODO populated by Task(Generic[EnvType]).step(...) but unused if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False self._last_sampled_task = ObjectNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task