def training_pipeline(self, **kwargs):
        ppo_steps = int(300000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        action_strs = ObjectNavTask.class_action_names()
        non_end_action_inds_set = {
            i for i, a in enumerate(action_strs) if a != robothor_constants.END
        }
        end_action_ind_set = {action_strs.index(robothor_constants.END)}

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(**PPOConfig),
                "grouped_action_imitation": GroupedActionImitation(
                    nactions=len(ObjectNavTask.class_action_names()),
                    action_groups=[non_end_action_inds_set, end_action_ind_set],
                ),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "grouped_action_imitation"],
                    max_stage_steps=ppo_steps,
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
    def create_model(cls, **kwargs) -> nn.Module:
        rgb_uuid = next(
            (s.uuid for s in cls.SENSORS if isinstance(s, RGBSensor)), None)
        depth_uuid = next(
            (s.uuid for s in cls.SENSORS if isinstance(s, DepthSensor)), None)
        goal_sensor_uuid = next(
            (s.uuid
             for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
            None,
        )

        return ObjectNavActorCritic(
            action_space=gym.spaces.Discrete(
                len(ObjectNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            rgb_uuid=rgb_uuid,
            depth_uuid=depth_uuid,
            goal_sensor_uuid=goal_sensor_uuid,
            hidden_size=192
            if cls.MULTIPLE_BELIEFS and len(cls.AUXILIARY_UUIDS) > 1 else 512,
            backbone=cls.BACKBONE,
            resnet_baseplanes=32,
            object_type_embedding_dim=32,
            num_rnn_layers=1,
            rnn_type="GRU",
            add_prev_actions=cls.ADD_PREV_ACTIONS,
            action_embed_size=6,
            auxiliary_uuids=cls.AUXILIARY_UUIDS,
            multiple_beliefs=cls.MULTIPLE_BELIEFS,
            beliefs_fusion=cls.BELIEF_FUSION,
        )
Ejemplo n.º 3
0
    def create_model(cls, **kwargs) -> nn.Module:
        has_rgb = any(isinstance(s, RGBSensor) for s in cls.SENSORS)
        has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS)
        goal_sensor_uuid = next(
            (s.uuid
             for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
            None,
        )

        return ResnetTensorObjectNavActorCritic(
            action_space=gym.spaces.Discrete(
                len(ObjectNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            goal_sensor_uuid=goal_sensor_uuid,
            rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None,
            depth_resnet_preprocessor_uuid="depth_resnet"
            if has_depth else None,
            hidden_size=512,
            goal_dims=32,
        )
class ObjectNaviThorRGBDAggerExperimentConfig(
        ObjectNavRoboThorBaseConfig,
        ObjectNavMixInDAggerConfig,
        ObjectNavMixInResNetGRUConfig,
):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ),
        ExpertActionSensor(nactions=len(ObjectNavTask.class_action_names()), ),
    ]

    @classmethod
    def tag(cls):
        return "Objectnav-RoboTHOR-RGB-ResNetGRU-DAgger"
Ejemplo n.º 5
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]],
        seeds: Optional[List[int]],
        deterministic_cudnn: bool,
        include_expert_sensor: bool = True,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))

        oversample_warning = (
            f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})."
            " You can avoid this by setting a number of workers divisible by the number of scenes"
        )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                get_logger().warning(oversample_warning)
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        elif len(scenes) % total_processes != 0:
            get_logger().warning(oversample_warning)

        inds = self._partition_inds(len(scenes), total_processes)

        x_display: Optional[str] = None
        if platform.system() == "Linux":
            x_displays = get_open_x_displays(throw_error_if_empty=True)

            if len(devices) > len(x_displays):
                get_logger().warning(
                    f"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`)."
                    f" This is not necessarily a bad thing but may mean that you're not using GPU memory as"
                    f" efficiently as possible. Consider following the instructions here:"
                    f" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin"
                    f" describing how to start an X-display on every GPU.")
            x_display = x_displays[process_ind % len(x_displays)]

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "object_types":
            self.TARGET_TYPES,
            "max_steps":
            self.MAX_STEPS,
            "sensors": [
                s for s in self.SENSORS
                if (include_expert_sensor
                    or not isinstance(s, ExpertActionSensor))
            ],
            "action_space":
            gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
            "env_args": {
                **self.ENV_ARGS,
                "x_display": x_display,
            },
        }
Ejemplo n.º 6
0
    def next_task(
            self,
            force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.episode_index >= len(
                self.episodes[self.scenes[self.scene_index]]):
            self.scene_index = (self.scene_index + 1) % len(self.scenes)
            # shuffle the new list of episodes to train on
            random.shuffle(self.episodes[self.scenes[self.scene_index]])
            self.episode_index = 0
        scene = self.scenes[self.scene_index]
        episode = self.episodes[scene][self.episode_index]
        if self.env is None:
            self.env = self._create_environment()

        if scene.replace("_physics", "") != self.env.scene_name.replace(
                "_physics", ""):
            self.env.reset(scene_name=scene)
        else:
            self.env.reset_object_filter()

        self.env.set_object_filter(object_ids=[
            o["objectId"] for o in self.env.last_event.metadata["objects"]
            if o["objectType"] == episode["object_type"]
        ])

        task_info = {"scene": scene, "object_type": episode["object_type"]}
        if len(task_info) == 0:
            get_logger().warning("Scene {} does not contain any"
                                 " objects of any of the types {}.".format(
                                     scene, self.object_types))
        task_info["initial_position"] = episode["initial_position"]
        task_info["initial_orientation"] = episode["initial_orientation"]
        task_info["initial_horizon"] = episode.get("initial_horizon", 0)
        task_info["distance_to_target"] = episode.get("shortest_path_length")
        task_info["path_to_target"] = episode.get("shortest_path")
        task_info["object_type"] = episode["object_type"]
        task_info["id"] = episode["id"]
        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self.episode_index += 1
        if self.max_tasks is not None:
            self.max_tasks -= 1
        if not self.env.teleport(
                pose=episode["initial_position"],
                rotation=episode["initial_orientation"],
                horizon=episode.get("initial_horizon", 0),
        ):
            return self.next_task()
        self._last_sampled_task = ObjectNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task
Ejemplo n.º 7
0
    def next_task(
            self,
            force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            # get_logger().debug("max_tasks {}".format(self.max_tasks))
            return None

        if not self.scenes_is_dataset:
            scene = self.sample_scene(force_advance_scene)

            if self.env is not None:
                if scene.replace("_physics",
                                 "") != self.env.scene_name.replace(
                                     "_physics", ""):
                    self.env.reset(scene)
            else:
                self.env = self._create_environment()
                self.env.reset(scene_name=scene)

            pose = self.env.randomize_agent_location()

            object_types_in_scene = set([
                o["objectType"]
                for o in self.env.last_event.metadata["objects"]
            ])

            task_info = {"scene": scene}
            for ot in random.sample(self.object_types, len(self.object_types)):
                if ot in object_types_in_scene:
                    task_info["object_type"] = ot
                    break

            if len(task_info) == 0:
                get_logger().warning("Scene {} does not contain any"
                                     " objects of any of the types {}.".format(
                                         scene, self.object_types))

            task_info["initial_position"] = {
                k: pose[k]
                for k in ["x", "y", "z"]
            }
            task_info["initial_orientation"] = cast(Dict[str, float],
                                                    pose["rotation"])["y"]
        else:
            assert self.max_tasks is not None
            next_task_id = self.dataset_first + self.max_tasks - 1
            # get_logger().debug("task {}".format(next_task_id))
            assert (self.dataset_first <= next_task_id <= self.dataset_last
                    ), "wrong task_id {} for min {} max {}".format(
                        next_task_id, self.dataset_first, self.dataset_last)
            task_info = copy.deepcopy(self.dataset_episodes[next_task_id])

            scene = task_info["scene"]
            if self.env is not None:
                if scene.replace("_physics",
                                 "") != self.env.scene_name.replace(
                                     "_physics", ""):
                    self.env.reset(scene_name=scene)
            else:
                self.env = self._create_environment()
                self.env.reset(scene_name=scene)

            self.env.step({
                "action": "TeleportFull",
                **{
                    k: float(v)
                    for k, v in task_info["initial_position"].items()
                },
                "rotation": {
                    "x": 0.0,
                    "y": float(task_info["initial_orientation"]),
                    "z": 0.0,
                },
                "horizon": 0.0,
                "standing": True,
            })
            assert self.env.last_action_success, "Failed to reset agent for {}".format(
                task_info)

            self.max_tasks -= 1

        # task_info["actions"] = []  # TODO populated by Task(Generic[EnvType]).step(...) but unused

        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self._last_sampled_task = ObjectNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task
Ejemplo n.º 8
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]],
        seeds: Optional[List[int]],
        deterministic_cudnn: bool,
        include_expert_sensor: bool = True,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))

        oversample_warning = (
            f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})."
            " You can avoid this by setting a number of workers divisible by the number of scenes"
        )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                get_logger().warning(oversample_warning)
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        elif len(scenes) % total_processes != 0:
            get_logger().warning(oversample_warning)

        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "object_types":
            self.TARGET_TYPES,
            "max_steps":
            self.MAX_STEPS,
            "sensors": [
                s for s in self.SENSORS
                if (include_expert_sensor
                    or not isinstance(s, ExpertActionSensor))
            ],
            "action_space":
            gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
            "env_args": {
                **self.ENV_ARGS,
                "x_display":
                (f"0.{devices[process_ind % len(devices)]}"
                 if devices is not None and len(devices) > 0
                 and devices[process_ind % len(devices)] >= 0 else None),
            },
        }