def training_pipeline(cls, **kwargs): ppo_steps = int(75000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 action_strs = PointNavTask.class_action_names() non_end_action_inds_set = { i for i, a in enumerate(action_strs) if a != robothor_constants.END } end_action_ind_set = {action_strs.index(robothor_constants.END)} return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "grouped_action_imitation": GroupedActionImitation( nactions=len(PointNavTask.class_action_names()), action_groups=[ non_end_action_inds_set, end_action_ind_set ], ), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "grouped_action_imitation"], max_stage_steps=ppo_steps, ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.CONFIG.clone() config.defrost() config.DATASET.DATA_PATH = self.VALID_SCENES config.MODE = "validate" config.freeze() return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore }
def create_model(cls, **kwargs) -> nn.Module: rgb_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, RGBSensor)), None) depth_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, DepthSensor)), None) goal_sensor_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, (GPSCompassSensorRoboThor, TargetCoordinatesSensorHabitat))), None, ) return PointNavActorCriticSimpleConvRNN( action_space=gym.spaces.Discrete( len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"]. observation_spaces, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, goal_sensor_uuid=goal_sensor_uuid, hidden_size=512, embed_coordinates=False, coordinate_dims=2, num_rnn_layers=1, rnn_type="GRU", )
def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorPointNavActorCritic( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, )
def create_model(cls, **kwargs) -> nn.Module: return PointNavActorCriticSimpleConvRNN( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["observation_set"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", hidden_size=512, embed_coordinates=False, coordinate_dims=2, num_rnn_layers=1, rnn_type="GRU", )
def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [ scene.split("/")[-1].split(".")[0] for scene in glob.glob(path) ] if len(scenes) == 0: raise RuntimeError(( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done.").format(scenes_dir)) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, }
def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore }
def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, # applied to the decrease in distance to target }, }
def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = ( scenes_dir + "*.json.gz" if scenes_dir[-1] == "/" else scenes_dir + "/*.json.gz" ) scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, }