Beispiel #1
0
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(75000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        action_strs = PointNavTask.class_action_names()
        non_end_action_inds_set = {
            i
            for i, a in enumerate(action_strs) if a != robothor_constants.END
        }
        end_action_ind_set = {action_strs.index(robothor_constants.END)}

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss":
                PPO(**PPOConfig),
                "grouped_action_imitation":
                GroupedActionImitation(
                    nactions=len(PointNavTask.class_action_names()),
                    action_groups=[
                        non_end_action_inds_set, end_action_ind_set
                    ],
                ),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "grouped_action_imitation"],
                    max_stage_steps=ppo_steps,
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
Beispiel #2
0
 def valid_task_sampler_args(
     self,
     process_ind: int,
     total_processes: int,
     devices: Optional[List[int]] = None,
     seeds: Optional[List[int]] = None,
     deterministic_cudnn: bool = False,
 ) -> Dict[str, Any]:
     config = self.CONFIG.clone()
     config.defrost()
     config.DATASET.DATA_PATH = self.VALID_SCENES
     config.MODE = "validate"
     config.freeze()
     return {
         "env_config":
         config,
         "max_steps":
         self.MAX_STEPS,
         "sensors":
         self.SENSORS,
         "action_space":
         gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         "distance_to_goal":
         self.DISTANCE_TO_GOAL,  # type:ignore
     }
    def create_model(cls, **kwargs) -> nn.Module:
        rgb_uuid = next(
            (s.uuid for s in cls.SENSORS if isinstance(s, RGBSensor)), None)
        depth_uuid = next(
            (s.uuid for s in cls.SENSORS if isinstance(s, DepthSensor)), None)
        goal_sensor_uuid = next(
            (s.uuid for s in cls.SENSORS
             if isinstance(s, (GPSCompassSensorRoboThor,
                               TargetCoordinatesSensorHabitat))),
            None,
        )

        return PointNavActorCriticSimpleConvRNN(
            action_space=gym.spaces.Discrete(
                len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            rgb_uuid=rgb_uuid,
            depth_uuid=depth_uuid,
            goal_sensor_uuid=goal_sensor_uuid,
            hidden_size=512,
            embed_coordinates=False,
            coordinate_dims=2,
            num_rnn_layers=1,
            rnn_type="GRU",
        )
 def create_model(cls, **kwargs) -> nn.Module:
     return ResnetTensorPointNavActorCritic(
         action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
         goal_sensor_uuid="target_coordinates_ind",
         rgb_resnet_preprocessor_uuid="rgb_resnet",
         hidden_size=512,
         goal_dims=32,
     )
 def create_model(cls, **kwargs) -> nn.Module:
     return PointNavActorCriticSimpleConvRNN(
         action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         observation_space=kwargs["observation_set"].observation_spaces,
         goal_sensor_uuid="target_coordinates_ind",
         hidden_size=512,
         embed_coordinates=False,
         coordinate_dims=2,
         num_rnn_layers=1,
         rnn_type="GRU",
     )
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
        }
 def train_task_sampler_args(
     self,
     process_ind: int,
     total_processes: int,
     devices: Optional[List[int]] = None,
     seeds: Optional[List[int]] = None,
     deterministic_cudnn: bool = False,
 ) -> Dict[str, Any]:
     config = self.TRAIN_CONFIGS[process_ind]
     return {
         "env_config": config,
         "max_steps": self.MAX_STEPS,
         "sensors": self.SENSORS,
         "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         "distance_to_goal": self.DISTANCE_TO_GOAL,  # type:ignore
     }
Beispiel #8
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes: List[str],
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes > len(scenes):
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config": {
                "step_penalty": -0.01,
                "goal_success_reward": 10.0,
                "failed_stop_reward": 0.0,
                "shaping_weight":
                1.0,  # applied to the decrease in distance to target
            },
        }
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = (
            scenes_dir + "*.json.gz"
            if scenes_dir[-1] == "/"
            else scenes_dir + "/*.json.gz"
        )
        scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
        }