def training_pipeline(self, **kwargs):
        ppo_steps = int(300000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        action_strs = ObjectNavTask.class_action_names()
        non_end_action_inds_set = {
            i
            for i, a in enumerate(action_strs) if a != robothor_constants.END
        }
        end_action_ind_set = {action_strs.index(robothor_constants.END)}

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss":
                PPO(**PPOConfig),
                "grouped_action_imitation":
                GroupedActionImitation(
                    nactions=len(ObjectNavTask.class_action_names()),
                    action_groups=[
                        non_end_action_inds_set, end_action_ind_set
                    ],
                ),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "grouped_action_imitation"],
                    max_stage_steps=ppo_steps,
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
예제 #2
0
    def next_task(
            self,
            force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.episode_index >= len(
                self.episodes[self.scenes[self.scene_index]]):
            self.scene_index = (self.scene_index + 1) % len(self.scenes)
            # shuffle the new list of episodes to train on
            random.shuffle(self.episodes[self.scenes[self.scene_index]])
            self.episode_index = 0
        scene = self.scenes[self.scene_index]
        episode = self.episodes[scene][self.episode_index]
        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                    "_physics", ""):
                self.env.reset(
                    scene_name=scene,
                    filtered_objects=list(
                        set([e["object_id"] for e in self.episodes[scene]])),
                )
        else:
            self.env = self._create_environment()
            self.env.reset(
                scene_name=scene,
                filtered_objects=list(
                    set([e["object_id"] for e in self.episodes[scene]])),
            )
        task_info = {"scene": scene, "object_type": episode["object_type"]}
        if len(task_info) == 0:
            get_logger().warning("Scene {} does not contain any"
                                 " objects of any of the types {}.".format(
                                     scene, self.object_types))
        task_info["initial_position"] = episode["initial_position"]
        task_info["initial_orientation"] = episode["initial_orientation"]
        task_info["distance_to_target"] = episode["shortest_path_length"]
        task_info["path_to_target"] = episode["shortest_path"]
        task_info["object_type"] = episode["object_type"]
        task_info["id"] = episode["id"]
        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self.episode_index += 1
        if self.max_tasks is not None:
            self.max_tasks -= 1
        if not self.env.teleport(episode["initial_position"],
                                 episode["initial_orientation"]):
            return self.next_task()
        self._last_sampled_task = ObjectNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task
예제 #3
0
 def create_model(cls, **kwargs) -> nn.Module:
     return ResnetTensorObjectNavActorCritic(
         action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
         observation_space=kwargs["observation_set"].observation_spaces,
         goal_sensor_uuid="goal_object_type_ind",
         rgb_resnet_preprocessor_uuid="rgb_resnet",
         hidden_size=512,
         goal_dims=32,
     )
예제 #4
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "object_types":
            self.TARGET_TYPES,
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
        }
예제 #5
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = (scenes_dir +
                "*.json.gz" if scenes_dir[-1] == "/" else scenes_dir +
                "/*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "object_types":
            self.TARGET_TYPES,
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
        }
예제 #6
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes: List[str],
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes > len(scenes):
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "object_types":
            self.TARGET_TYPES,
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config": {
                "step_penalty": -0.01,
                "goal_success_reward": 10.0,
                "failed_stop_reward": 0.0,
                "shaping_weight":
                1.0,  # applied to the decrease in distance to target
            },
        }
예제 #7
0
    def create_model(cls, **kwargs) -> nn.Module:
        has_rgb = any(isinstance(s, RGBSensor) for s in cls.SENSORS)
        has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS)
        goal_sensor_uuid = next(
            (s.uuid
             for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
            None,
        )

        return ResnetTensorObjectNavActorCritic(
            action_space=gym.spaces.Discrete(
                len(ObjectNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            goal_sensor_uuid=goal_sensor_uuid,
            rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None,
            depth_resnet_preprocessor_uuid="depth_resnet"
            if has_depth else None,
            hidden_size=512,
            goal_dims=32,
        )
    def __init__(self):
        super().__init__()
        self.REWARD_CONFIG["shaping_weight"] = 0

        self.SENSORS = [
            RGBSensorThor(
                height=self.SCREEN_SIZE,
                width=self.SCREEN_SIZE,
                use_resnet_normalization=True,
                uuid="rgb_lowres",
            ),
            GoalObjectTypeThorSensor(object_types=self.TARGET_TYPES, ),
            ExpertActionSensor(nactions=len(
                ObjectNavTask.class_action_names()), ),
        ]

        self.PREPROCESSORS = [
            Builder(
                ResnetPreProcessorHabitat,
                {
                    "input_height": self.SCREEN_SIZE,
                    "input_width": self.SCREEN_SIZE,
                    "output_width": 7,
                    "output_height": 7,
                    "output_dims": 512,
                    "pool": False,
                    "torchvision_resnet_model": models.resnet18,
                    "input_uuids": ["rgb_lowres"],
                    "output_uuid": "rgb_resnet",
                    "parallel": False,
                },
            ),
        ]

        self.OBSERVATIONS = [
            "rgb_resnet",
            "goal_object_type_ind",
            "expert_action",
        ]
예제 #9
0
class ObjectNaviThorRGBDAggerExperimentConfig(
        ObjectNavRoboThorBaseConfig,
        ObjectNavMixInDAggerConfig,
        ObjectNavMixInResNetGRUConfig,
):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ),
        ExpertActionSensor(nactions=len(ObjectNavTask.class_action_names()), ),
    ]

    @classmethod
    def tag(cls):
        return "Objectnav-RoboTHOR-RGB-ResNetGRU-DAgger"
예제 #10
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]],
        seeds: Optional[List[int]],
        deterministic_cudnn: bool,
        include_expert_sensor: bool = True,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))

        oversample_warning = (
            f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})."
            " You can avoid this by setting a number of workers divisible by the number of scenes"
        )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                get_logger().warning(oversample_warning)
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        elif len(scenes) % total_processes != 0:
            get_logger().warning(oversample_warning)

        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "object_types":
            self.TARGET_TYPES,
            "max_steps":
            self.MAX_STEPS,
            "sensors": [
                s for s in self.SENSORS
                if (include_expert_sensor
                    or not isinstance(s, ExpertActionSensor))
            ],
            "action_space":
            gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
            "env_args": {
                **self.ENV_ARGS,
                "x_display":
                (f"0.{devices[process_ind % len(devices)]}"
                 if devices is not None and len(devices) > 0
                 and devices[process_ind % len(devices)] >= 0 else None),
            },
        }
예제 #11
0
    def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            # get_logger().debug("max_tasks {}".format(self.max_tasks))
            return None

        if not self.scenes_is_dataset:
            scene = self.sample_scene(force_advance_scene)

            if self.env is not None:
                if scene.replace("_physics", "") != self.env.scene_name.replace(
                    "_physics", ""
                ):
                    self.env.reset(scene)
            else:
                self.env = self._create_environment()
                self.env.reset(scene_name=scene)

            pose = self.env.randomize_agent_location()

            object_types_in_scene = set(
                [o["objectType"] for o in self.env.last_event.metadata["objects"]]
            )

            task_info = {"scene": scene}
            for ot in random.sample(self.object_types, len(self.object_types)):
                if ot in object_types_in_scene:
                    task_info["object_type"] = ot
                    break

            if len(task_info) == 0:
                get_logger().warning(
                    "Scene {} does not contain any"
                    " objects of any of the types {}.".format(scene, self.object_types)
                )

            task_info["initial_position"] = {k: pose[k] for k in ["x", "y", "z"]}
            task_info["initial_orientation"] = cast(Dict[str, float], pose["rotation"])[
                "y"
            ]
        else:
            assert self.max_tasks is not None
            next_task_id = self.dataset_first + self.max_tasks - 1
            # get_logger().debug("task {}".format(next_task_id))
            assert (
                self.dataset_first <= next_task_id <= self.dataset_last
            ), "wrong task_id {} for min {} max {}".format(
                next_task_id, self.dataset_first, self.dataset_last
            )
            task_info = copy.deepcopy(self.dataset_episodes[next_task_id])

            scene = task_info["scene"]
            if self.env is not None:
                if scene.replace("_physics", "") != self.env.scene_name.replace(
                    "_physics", ""
                ):
                    self.env.reset(scene_name=scene)
            else:
                self.env = self._create_environment()
                self.env.reset(scene_name=scene)

            self.env.step(
                {
                    "action": "TeleportFull",
                    **{k: float(v) for k, v in task_info["initial_position"].items()},
                    "rotation": {
                        "x": 0.0,
                        "y": float(task_info["initial_orientation"]),
                        "z": 0.0,
                    },
                    "horizon": 0.0,
                }
            )
            assert self.env.last_action_success, "Failed to reset agent for {}".format(
                task_info
            )

            self.max_tasks -= 1

        # task_info["actions"] = []  # TODO populated by Task(Generic[EnvType]).step(...) but unused

        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self._last_sampled_task = ObjectNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task