def training_pipeline(cls, **kwargs): ppo_steps = int(75000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 action_strs = PointNavTask.class_action_names() non_end_action_inds_set = { i for i, a in enumerate(action_strs) if a != robothor_constants.END } end_action_ind_set = {action_strs.index(robothor_constants.END)} return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "grouped_action_imitation": GroupedActionImitation( nactions=len(PointNavTask.class_action_names()), action_groups=[ non_end_action_inds_set, end_action_ind_set ], ), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "grouped_action_imitation"], max_stage_steps=ppo_steps, ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.CONFIG.clone() config.defrost() config.DATASET.DATA_PATH = self.VALID_SCENES config.MODE = "validate" config.freeze() return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore }
def create_model(cls, **kwargs) -> nn.Module: rgb_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, RGBSensor)), None) depth_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, DepthSensor)), None) goal_sensor_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, (GPSCompassSensorRoboThor, TargetCoordinatesSensorHabitat))), None, ) return PointNavActorCriticSimpleConvRNN( action_space=gym.spaces.Discrete( len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"]. observation_spaces, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, goal_sensor_uuid=goal_sensor_uuid, hidden_size=512, embed_coordinates=False, coordinate_dims=2, num_rnn_layers=1, rnn_type="GRU", )
def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorPointNavActorCritic( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, )
def create_model(cls, **kwargs) -> nn.Module: return PointNavActorCriticSimpleConvRNN( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["observation_set"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", hidden_size=512, embed_coordinates=False, coordinate_dims=2, num_rnn_layers=1, rnn_type="GRU", )
def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [ scene.split("/")[-1].split(".")[0] for scene in glob.glob(path) ] if len(scenes) == 0: raise RuntimeError(( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done.").format(scenes_dir)) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, }
def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore }
def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, # applied to the decrease in distance to target }, }
def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = ( scenes_dir + "*.json.gz" if scenes_dir[-1] == "/" else scenes_dir + "/*.json.gz" ) scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, }
def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]): self.scene_index = (self.scene_index + 1) % len(self.scenes) # shuffle the new list of episodes to train on if self.shuffle_dataset: random.shuffle(self.episodes[self.scenes[self.scene_index]]) self.episode_index = 0 scene = self.scenes[self.scene_index] episode = self.episodes[scene][self.episode_index] if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene_name=scene, filtered_objects=[]) else: self.env = self._create_environment() self.env.reset(scene_name=scene, filtered_objects=[]) def to_pos(s): if isinstance(s, (Dict, Tuple)): return s if isinstance(s, float): return {"x": 0, "y": s, "z": 0} return str_to_pos_for_cache(s) for k in ["initial_position", "initial_orientation", "target_position"]: episode[k] = to_pos(episode[k]) task_info = { "scene": scene, "initial_position": episode["initial_position"], "initial_orientation": episode["initial_orientation"], "target": episode["target_position"], "shortest_path": episode["shortest_path"], "distance_to_target": episode["shortest_path_length"], "id": episode["id"], } if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False self.episode_index += 1 if self.max_tasks is not None: self.max_tasks -= 1 if not self.env.teleport( pose=episode["initial_position"], rotation=episode["initial_orientation"] ): return self.next_task() self._last_sampled_task = PointNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task
def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None scene = self.sample_scene(force_advance_scene) if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene_name=scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) # task_info = copy.deepcopy(self.sample_episode(scene)) # task_info['target'] = task_info['target_position'] # task_info['actions'] = [] locs = self.env.known_good_locations_list() # get_logger().debug("locs[0] {} locs[-1] {}".format(locs[0], locs[-1])) ys = [loc["y"] for loc in locs] miny = min(ys) maxy = max(ys) assert maxy - miny < 1e-6, "miny {} maxy {} for scene {}".format( miny, maxy, scene ) too_close_to_target = True target: Optional[Dict[str, float]] = None for _ in range(10): self.env.randomize_agent_location() target = copy.copy(random.choice(locs)) too_close_to_target = self.env.distance_to_point(target) <= 0 if not too_close_to_target: break pose = self.env.agent_state() task_info = { "scene": scene, "initial_position": {k: pose[k] for k in ["x", "y", "z"]}, "initial_orientation": pose["rotation"]["y"], "target": target, "actions": [], } if too_close_to_target: get_logger().warning("No path for sampled episode {}".format(task_info)) # else: # get_logger().debug("Path found for sampled episode {}".format(task_info)) # pose = {**task_info['initial_position'], 'rotation': {'x': 0.0, 'y': task_info['initial_orientation'], 'z': 0.0}, 'horizon': 0.0} # self.env.step({"action": "TeleportFull", **pose}) # assert self.env.last_action_success, "Failed to initialize agent to {} in {} for epsiode {}".format(pose, scene, task_info) self._last_sampled_task = PointNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task
def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.episode_index >= len( self.episodes[self.scenes[self.scene_index]]): self.scene_index = (self.scene_index + 1) % len(self.scenes) # shuffle the new list of episodes to train on if self.shuffle_dataset: random.shuffle(self.episodes[self.scenes[self.scene_index]]) self.episode_index = 0 scene = self.scenes[self.scene_index] episode = self.episodes[scene][self.episode_index] distance_cache = self.distance_caches[ scene] if self.distance_caches else None if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", ""): self.env.reset(scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) task_info = { "scene": scene, "initial_position": ["initial_position"], "initial_orientation": episode["initial_orientation"], "target": find_nearest_point_in_cache( distance_cache, _str_to_pos(episode["target_position"])), "shortest_path": episode["shortest_path"], "distance_to_target": episode["shortest_path_length"], } if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False if self.reset_tasks is not None: get_logger().debug("valid task_info {}".format(task_info)) self.episode_index += 1 if self.max_tasks is not None: self.max_tasks -= 1 if not self.env.teleport( _str_to_pos(episode["initial_position"]), { "x": 0.0, "y": episode["initial_orientation"], "z": 0.0 }, ): return self.next_task() self._last_sampled_task = PointNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, distance_cache=distance_cache, episode_info=episode, ) return self._last_sampled_task