Example #1
0
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(75000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        action_strs = PointNavTask.class_action_names()
        non_end_action_inds_set = {
            i
            for i, a in enumerate(action_strs) if a != robothor_constants.END
        }
        end_action_ind_set = {action_strs.index(robothor_constants.END)}

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss":
                PPO(**PPOConfig),
                "grouped_action_imitation":
                GroupedActionImitation(
                    nactions=len(PointNavTask.class_action_names()),
                    action_groups=[
                        non_end_action_inds_set, end_action_ind_set
                    ],
                ),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "grouped_action_imitation"],
                    max_stage_steps=ppo_steps,
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
Example #2
0
 def valid_task_sampler_args(
     self,
     process_ind: int,
     total_processes: int,
     devices: Optional[List[int]] = None,
     seeds: Optional[List[int]] = None,
     deterministic_cudnn: bool = False,
 ) -> Dict[str, Any]:
     config = self.CONFIG.clone()
     config.defrost()
     config.DATASET.DATA_PATH = self.VALID_SCENES
     config.MODE = "validate"
     config.freeze()
     return {
         "env_config":
         config,
         "max_steps":
         self.MAX_STEPS,
         "sensors":
         self.SENSORS,
         "action_space":
         gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         "distance_to_goal":
         self.DISTANCE_TO_GOAL,  # type:ignore
     }
    def create_model(cls, **kwargs) -> nn.Module:
        rgb_uuid = next(
            (s.uuid for s in cls.SENSORS if isinstance(s, RGBSensor)), None)
        depth_uuid = next(
            (s.uuid for s in cls.SENSORS if isinstance(s, DepthSensor)), None)
        goal_sensor_uuid = next(
            (s.uuid for s in cls.SENSORS
             if isinstance(s, (GPSCompassSensorRoboThor,
                               TargetCoordinatesSensorHabitat))),
            None,
        )

        return PointNavActorCriticSimpleConvRNN(
            action_space=gym.spaces.Discrete(
                len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            rgb_uuid=rgb_uuid,
            depth_uuid=depth_uuid,
            goal_sensor_uuid=goal_sensor_uuid,
            hidden_size=512,
            embed_coordinates=False,
            coordinate_dims=2,
            num_rnn_layers=1,
            rnn_type="GRU",
        )
 def create_model(cls, **kwargs) -> nn.Module:
     return ResnetTensorPointNavActorCritic(
         action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
         goal_sensor_uuid="target_coordinates_ind",
         rgb_resnet_preprocessor_uuid="rgb_resnet",
         hidden_size=512,
         goal_dims=32,
     )
 def create_model(cls, **kwargs) -> nn.Module:
     return PointNavActorCriticSimpleConvRNN(
         action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         observation_space=kwargs["observation_set"].observation_spaces,
         goal_sensor_uuid="target_coordinates_ind",
         hidden_size=512,
         embed_coordinates=False,
         coordinate_dims=2,
         num_rnn_layers=1,
         rnn_type="GRU",
     )
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
        }
 def train_task_sampler_args(
     self,
     process_ind: int,
     total_processes: int,
     devices: Optional[List[int]] = None,
     seeds: Optional[List[int]] = None,
     deterministic_cudnn: bool = False,
 ) -> Dict[str, Any]:
     config = self.TRAIN_CONFIGS[process_ind]
     return {
         "env_config": config,
         "max_steps": self.MAX_STEPS,
         "sensors": self.SENSORS,
         "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
         "distance_to_goal": self.DISTANCE_TO_GOAL,  # type:ignore
     }
Example #8
0
    def _get_sampler_args_for_scene_split(
        self,
        scenes: List[str],
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes > len(scenes):
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config": {
                "step_penalty": -0.01,
                "goal_success_reward": 10.0,
                "failed_stop_reward": 0.0,
                "shaping_weight":
                1.0,  # applied to the decrease in distance to target
            },
        }
    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = (
            scenes_dir + "*.json.gz"
            if scenes_dir[-1] == "/"
            else scenes_dir + "/*.json.gz"
        )
        scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
        }
Example #10
0
    def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):
            self.scene_index = (self.scene_index + 1) % len(self.scenes)
            # shuffle the new list of episodes to train on
            if self.shuffle_dataset:
                random.shuffle(self.episodes[self.scenes[self.scene_index]])
            self.episode_index = 0

        scene = self.scenes[self.scene_index]
        episode = self.episodes[scene][self.episode_index]
        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                "_physics", ""
            ):
                self.env.reset(scene_name=scene, filtered_objects=[])
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene, filtered_objects=[])

        def to_pos(s):
            if isinstance(s, (Dict, Tuple)):
                return s
            if isinstance(s, float):
                return {"x": 0, "y": s, "z": 0}
            return str_to_pos_for_cache(s)

        for k in ["initial_position", "initial_orientation", "target_position"]:
            episode[k] = to_pos(episode[k])

        task_info = {
            "scene": scene,
            "initial_position": episode["initial_position"],
            "initial_orientation": episode["initial_orientation"],
            "target": episode["target_position"],
            "shortest_path": episode["shortest_path"],
            "distance_to_target": episode["shortest_path_length"],
            "id": episode["id"],
        }

        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self.episode_index += 1
        if self.max_tasks is not None:
            self.max_tasks -= 1

        if not self.env.teleport(
            pose=episode["initial_position"], rotation=episode["initial_orientation"]
        ):
            return self.next_task()

        self._last_sampled_task = PointNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )

        return self._last_sampled_task
Example #11
0
    def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        scene = self.sample_scene(force_advance_scene)

        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                "_physics", ""
            ):
                self.env.reset(scene_name=scene)
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene)

        # task_info = copy.deepcopy(self.sample_episode(scene))
        # task_info['target'] = task_info['target_position']
        # task_info['actions'] = []

        locs = self.env.known_good_locations_list()
        # get_logger().debug("locs[0] {} locs[-1] {}".format(locs[0], locs[-1]))

        ys = [loc["y"] for loc in locs]
        miny = min(ys)
        maxy = max(ys)
        assert maxy - miny < 1e-6, "miny {} maxy {} for scene {}".format(
            miny, maxy, scene
        )

        too_close_to_target = True
        target: Optional[Dict[str, float]] = None
        for _ in range(10):
            self.env.randomize_agent_location()
            target = copy.copy(random.choice(locs))
            too_close_to_target = self.env.distance_to_point(target) <= 0
            if not too_close_to_target:
                break

        pose = self.env.agent_state()

        task_info = {
            "scene": scene,
            "initial_position": {k: pose[k] for k in ["x", "y", "z"]},
            "initial_orientation": pose["rotation"]["y"],
            "target": target,
            "actions": [],
        }

        if too_close_to_target:
            get_logger().warning("No path for sampled episode {}".format(task_info))
        # else:
        #     get_logger().debug("Path found for sampled episode {}".format(task_info))

        # pose = {**task_info['initial_position'], 'rotation': {'x': 0.0, 'y': task_info['initial_orientation'], 'z': 0.0}, 'horizon': 0.0}
        # self.env.step({"action": "TeleportFull", **pose})
        # assert self.env.last_action_success, "Failed to initialize agent to {} in {} for epsiode {}".format(pose, scene, task_info)

        self._last_sampled_task = PointNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task
    def next_task(self,
                  force_advance_scene: bool = False) -> Optional[PointNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.episode_index >= len(
                self.episodes[self.scenes[self.scene_index]]):
            self.scene_index = (self.scene_index + 1) % len(self.scenes)
            # shuffle the new list of episodes to train on
            if self.shuffle_dataset:
                random.shuffle(self.episodes[self.scenes[self.scene_index]])
            self.episode_index = 0

        scene = self.scenes[self.scene_index]
        episode = self.episodes[scene][self.episode_index]
        distance_cache = self.distance_caches[
            scene] if self.distance_caches else None
        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                    "_physics", ""):
                self.env.reset(scene)
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene)

        task_info = {
            "scene":
            scene,
            "initial_position": ["initial_position"],
            "initial_orientation":
            episode["initial_orientation"],
            "target":
            find_nearest_point_in_cache(
                distance_cache, _str_to_pos(episode["target_position"])),
            "shortest_path":
            episode["shortest_path"],
            "distance_to_target":
            episode["shortest_path_length"],
        }

        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        if self.reset_tasks is not None:
            get_logger().debug("valid task_info {}".format(task_info))

        self.episode_index += 1
        if self.max_tasks is not None:
            self.max_tasks -= 1

        if not self.env.teleport(
                _str_to_pos(episode["initial_position"]),
            {
                "x": 0.0,
                "y": episode["initial_orientation"],
                "z": 0.0
            },
        ):
            return self.next_task()

        self._last_sampled_task = PointNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
            distance_cache=distance_cache,
            episode_info=episode,
        )

        return self._last_sampled_task