Beispiel #1
0
 def __init__(self,
              config: Config,
              dataset: Optional[Dataset] = None) -> None:
     assert config.is_frozen(), ("Freeze the config before creating the "
                                 "environment, use config.freeze()")
     self._config = config
     self._dataset = dataset
     self._current_episode_index = None
     if self._dataset is None and config.DATASET.TYPE:
         self._dataset = make_dataset(id_dataset=config.DATASET.TYPE,
                                      config=config.DATASET)
     self._episodes = self._dataset.episodes if self._dataset else []
     self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE,
                          config=self._config.SIMULATOR)
     self._task = make_task(
         self._config.TASK.TYPE,
         task_config=self._config.TASK,
         sim=self._sim,
         dataset=dataset,
     )
     self.observation_space = SpaceDict({
         **self._sim.sensor_suite.observation_spaces.spaces,
         **self._task.sensor_suite.observation_spaces.spaces,
     })
     self.action_space = self._sim.action_space
     self._max_episode_seconds = (
         self._config.ENVIRONMENT.MAX_EPISODE_SECONDS)
     self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS
     self._elapsed_steps = 0
     self._episode_start_time: Optional[float] = None
     self._episode_over = False
Beispiel #2
0
    def __init__(self,
                 config: Config,
                 dataset: Optional[Dataset] = None) -> None:
        """Constructor

        :param config: config for the environment. Should contain id for
            simulator and ``task_name`` which are passed into ``make_sim`` and
            ``make_task``.
        :param dataset: reference to dataset for task instance level
            information. Can be defined as :py:`None` in which case
            ``_episodes`` should be populated from outside.
        """

        assert config.is_frozen(), ("Freeze the config before creating the "
                                    "environment, use config.freeze().")
        self._config = config
        self._dataset = dataset
        self._current_episode_index = None
        if self._dataset is None and config.DATASET.TYPE:
            self._dataset = make_dataset(id_dataset=config.DATASET.TYPE,
                                         config=config.DATASET)
        self._episodes = self._dataset.episodes if self._dataset else []
        self._current_episode = None
        iter_option_dict = {
            k.lower(): v
            for k, v in config.ENVIRONMENT.ITERATOR_OPTIONS.items()
        }
        self._episode_iterator = self._dataset.get_episode_iterator(
            **iter_option_dict)

        # load the first scene if dataset is present
        if self._dataset:
            assert (len(self._dataset.episodes) >
                    0), "dataset should have non-empty episodes list"
            self._config.defrost()
            self._config.SIMULATOR.SCENE = self._dataset.episodes[0].scene_id
            self._config.freeze()

        self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE,
                             config=self._config.SIMULATOR)
        self._task = make_task(
            self._config.TASK.TYPE,
            config=self._config.TASK,
            sim=self._sim,
            dataset=self._dataset,
        )
        self.observation_space = SpaceDict({
            **self._sim.sensor_suite.observation_spaces.spaces,
            **self._task.sensor_suite.observation_spaces.spaces,
        })
        self.action_space = self._task.action_space
        self._max_episode_seconds = (
            self._config.ENVIRONMENT.MAX_EPISODE_SECONDS)
        self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS
        self._elapsed_steps = 0
        self._episode_start_time: Optional[float] = None
        self._episode_over = False
 def __init__(self, sensors: List[Sensor]) -> None:
     self.sensors = OrderedDict()
     spaces: OrderedDict[str, Space] = OrderedDict()
     for sensor in sensors:
         assert (
             sensor.uuid not in self.sensors
         ), "'{}' is duplicated sensor uuid".format(sensor.uuid)
         self.sensors[sensor.uuid] = sensor
         spaces[sensor.uuid] = sensor.observation_space
     self.observation_spaces = SpaceDict(spaces=spaces)
Beispiel #4
0
    def _setup_actor_critic_agent(self, observations, ppo_cfg: Config) -> None:
        # Global policy observation space
        self.obs_space = self.envs.observation_spaces[0]
        # add the map observation space
        self.obs_space = SpaceDict(
            {
                "map_sum": spaces.Box(
                    low=0,
                    high=1,
                    shape=observations[0]["map_sum"].shape,
                    dtype=np.uint8,
                ),
                "curr_pose": spaces.Box(
                    low=0,
                    high=1,
                    shape=observations[0]["curr_pose"].shape,
                    dtype=np.uint8,
                ),
                **self.obs_space.spaces,
            }
        )
        # print("*************************** self.obs_space:", self.obs_space) #self.obs_space: Dict(compass:Box(1,), depth:Box(256, 256, 1), gps:Box(2,), map_sum:Box(480, 480, 23), objectgoal:Box(1,), rgb:Box(256, 256, 3), semantic:Box(256, 256))


        # Global policy action space
        self.g_action_space = spaces.Box(low=0.0, high=1.0,
                                    shape=(2,), dtype=np.float32)
    
        self.actor_critic = ObjectNavGraphSLAMPolicy(
            observation_space=self.obs_space,
            g_action_space=self.g_action_space,
            l_action_space=self.envs.action_spaces[0],
            pretrain_path = None,
            output_size = self.config.RL.SLAMDDPPO.map_output_size,
        )
        self.actor_critic.to(self.device)

        print("*************************** action_space", self.envs.action_spaces[0].n)
        # print("*************************** action_space n", self.envs.action_spaces)

        self.agent = DDPPOSLAM(
            actor_critic=self.actor_critic,
            clip_param=ppo_cfg.clip_param,
            ppo_epoch=ppo_cfg.ppo_epoch,
            num_mini_batch=ppo_cfg.num_mini_batch,
            value_loss_coef=ppo_cfg.value_loss_coef,
            entropy_coef=ppo_cfg.entropy_coef,
            lr=ppo_cfg.lr,
            eps=ppo_cfg.eps,
            max_grad_norm=ppo_cfg.max_grad_norm,
            use_normalized_advantage=ppo_cfg.use_normalized_advantage,
        )
    def __init__(self,
                 forward_step,
                 angle_step,
                 is_blind=False,
                 sensors=["RGB_SENSOR"]):
        config = habitat.get_config()

        log_mesg("env: forward_step: {}, angle_step: {}".format(
            forward_step, angle_step))

        config.defrost()
        config.PYROBOT.SENSORS = sensors
        config.PYROBOT.RGB_SENSOR.WIDTH = 256
        config.PYROBOT.RGB_SENSOR.HEIGHT = 256
        config.PYROBOT.DEPTH_SENSOR.WIDTH = 256
        config.PYROBOT.DEPTH_SENSOR.HEIGHT = 256
        config.PYROBOT.DEPTH_SENSOR.MAX_DEPTH = 10
        config.PYROBOT.DEPTH_SENSOR.MIN_DEPTH = 0.3
        config.freeze()

        self._reality = make_sim(id_sim="PyRobot-v0", config=config.PYROBOT)
        self._angle = (angle_step / 180) * np.pi
        self._pointgoal_key = "pointgoal_with_gps_compass"
        self.is_blind = is_blind

        if not is_blind:
            sensors_dict = {
                **self._reality.sensor_suite.observation_spaces.spaces
            }
        else:
            sensors_dict = {}

        sensors_dict[self._pointgoal_key] = spaces.Box(
            low=np.finfo(np.float32).min,
            high=np.finfo(np.float32).max,
            shape=(2, ),
            dtype=np.float32,
        )
        self.observation_space = SpaceDict(sensors_dict)

        self.action_space = spaces.Discrete(4)

        self._actions = {
            "forward": [forward_step, 0, 0],
            "left": [0, 0, self._angle],
            "right": [0, 0, -self._angle],
            "stop": [0, 0, 0],
        }
        self._process = None
        self._last_time = -1
def load_model():
    depth_256_space = SpaceDict({
        'depth':
        spaces.Box(low=0., high=1., shape=(256, 256, 1)),
        'pointgoal_with_gps_compass':
        spaces.Box(
            low=np.finfo(np.float32).min,
            high=np.finfo(np.float32).max,
            shape=(2, ),
            dtype=np.float32,
        )
    })

    if GAUSSIAN:
        action_space = spaces.Box(np.array([float('-inf'),
                                            float('-inf')]),
                                  np.array([float('inf'),
                                            float('inf')]))
        action_distribution = 'gaussian'
        dim_actions = 2
    elif DISCRETE_4:
        action_space = spaces.Discrete(4)
        action_distribution = 'categorical'
        dim_actions = 4
    elif DISCRETE_6:
        action_space = spaces.Discrete(6)
        action_distribution = 'categorical'
        dim_actions = 6

    model = PointNavResNetPolicy(observation_space=depth_256_space,
                                 action_space=action_space,
                                 hidden_size=512,
                                 rnn_type='LSTM',
                                 num_recurrent_layers=2,
                                 backbone='resnet50',
                                 normalize_visual_inputs=False,
                                 action_distribution=action_distribution,
                                 dim_actions=dim_actions)
    model.to(torch.device("cpu"))

    data_dict = OrderedDict()
    with open(WEIGHTS_PATH, 'r') as f:
        data_dict = json.load(f)
    model.load_state_dict({
        k[len("actor_critic."):]: torch.tensor(v)
        for k, v in data_dict.items() if k.startswith("actor_critic.")
    })

    return model
Beispiel #7
0
    def __init__(self, sensors: Iterable[Sensor]) -> None:
        """Constructor

        :param sensors: list containing sensors for the environment, uuid of
            each sensor must be unique.
        """
        self.sensors = OrderedDict()
        spaces: OrderedDict[str, Space] = OrderedDict()
        for sensor in sensors:
            assert (
                sensor.uuid not in self.sensors
            ), "'{}' is duplicated sensor uuid".format(sensor.uuid)
            self.sensors[sensor.uuid] = sensor
            spaces[sensor.uuid] = sensor.observation_space
        self.observation_spaces = SpaceDict(spaces=spaces)
Beispiel #8
0
    def __init__(self,
                 config: Config,
                 dataset: Optional[Dataset] = None) -> None:
        assert config.is_frozen(), ("Freeze the config before creating the "
                                    "environment, use config.freeze().")
        self._config = config
        self._dataset = dataset
        self._current_episode_index = None
        if self._dataset is None and config.DATASET.TYPE:
            self._dataset = make_dataset(id_dataset=config.DATASET.TYPE,
                                         config=config.DATASET)
        self._episodes = self._dataset.episodes if self._dataset else []
        self._current_episode = None
        iter_option_dict = {
            k.lower(): v
            for k, v in config.ENVIRONMENT.ITERATOR_OPTIONS.items()
        }
        self._episode_iterator = self._dataset.get_episode_iterator(
            **iter_option_dict)

        # load the first scene if dataset is present
        if self._dataset:
            assert (len(self._dataset.episodes) >
                    0), "dataset should have non-empty episodes list"
            self._config.defrost()
            self._config.SIMULATOR.SCENE = self._dataset.episodes[0].scene_id
            self._config.freeze()

        self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE,
                             config=self._config.SIMULATOR)
        self._task = make_task(
            self._config.TASK.TYPE,
            task_config=self._config.TASK,
            sim=self._sim,
            dataset=self._dataset,
        )
        self.observation_space = SpaceDict({
            **self._sim.sensor_suite.observation_spaces.spaces,
            **self._task.sensor_suite.observation_spaces.spaces,
        })
        self.action_space = self._sim.action_space
        self._max_episode_seconds = (
            self._config.ENVIRONMENT.MAX_EPISODE_SECONDS)
        self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS
        self._elapsed_steps = 0
        self._episode_start_time: Optional[float] = None
        self._episode_over = False
Beispiel #9
0
def load_model(weights_path, dim_actions):  # DON'T CHANGE
    depth_256_space = SpaceDict({
        'depth':
        spaces.Box(low=0., high=1., shape=(240, 320, 1)),
        'pointgoal_with_gps_compass':
        spaces.Box(
            low=np.finfo(np.float32).min,
            high=np.finfo(np.float32).max,
            shape=(2, ),
            dtype=np.float32,
        )
    })

    action_space = spaces.Box(np.array([float('-inf'),
                                        float('-inf')]),
                              np.array([float('inf'),
                                        float('inf')]))
    action_distribution = 'gaussian'

    # action_space = spaces.Discrete(4)
    # action_distribution = 'categorical'

    model = PointNavResNetPolicy(observation_space=depth_256_space,
                                 action_space=action_space,
                                 hidden_size=512,
                                 rnn_type='LSTM',
                                 num_recurrent_layers=2,
                                 backbone='resnet18',
                                 normalize_visual_inputs=False,
                                 action_distribution=action_distribution,
                                 dim_actions=dim_actions)
    model.to(torch.device(DEVICE))

    state_dict = OrderedDict()
    with open(weights_path, 'r') as f:
        state_dict = json.load(f)
    # state_dict = torch.load(weights_path, map_location=DEVICE)
    model.load_state_dict({
        k[len("actor_critic."):]: torch.tensor(v)
        for k, v in state_dict.items() if k.startswith("actor_critic.")
    })

    return model
    def train(self) -> None:
        r"""Main method for DD-PPO.

        Returns:
            None
        """
        self.local_rank, tcp_store = init_distrib_slurm(
            self.config.RL.DDPPO.distrib_backend)
        add_signal_handlers()

        # Stores the number of workers that have finished their rollout
        num_rollouts_done_store = distrib.PrefixStore("rollout_tracker",
                                                      tcp_store)
        num_rollouts_done_store.set("num_done", "0")

        self.world_rank = distrib.get_rank()
        self.world_size = distrib.get_world_size()

        self.config.defrost()
        self.config.TORCH_GPU_ID = self.local_rank
        self.config.SIMULATOR_GPU_ID = self.local_rank
        # Multiply by the number of simulators to make sure they also get unique seeds
        self.config.TASK_CONFIG.SEED += (self.world_rank *
                                         self.config.NUM_PROCESSES)
        self.config.freeze()

        random.seed(self.config.TASK_CONFIG.SEED)
        np.random.seed(self.config.TASK_CONFIG.SEED)
        torch.manual_seed(self.config.TASK_CONFIG.SEED)

        if torch.cuda.is_available():
            self.device = torch.device("cuda", self.local_rank)
            torch.cuda.set_device(self.device)
        else:
            self.device = torch.device("cpu")

        self.envs = construct_envs(self.config,
                                   get_env_class(self.config.ENV_NAME))

        ppo_cfg = self.config.RL.PPO
        if (not os.path.isdir(self.config.CHECKPOINT_FOLDER)
                and self.world_rank == 0):
            os.makedirs(self.config.CHECKPOINT_FOLDER)

        self._setup_actor_critic_agent(ppo_cfg)
        self.agent.init_distributed(find_unused_params=True)

        if self.world_rank == 0:
            logger.info("agent number of trainable parameters: {}".format(
                sum(param.numel() for param in self.agent.parameters()
                    if param.requires_grad)))

        observations = self.envs.reset()
        batch = batch_obs(observations, device=self.device)

        obs_space = self.envs.observation_spaces[0]
        if self._static_encoder:
            self._encoder = self.actor_critic.net.visual_encoder
            obs_space = SpaceDict({
                "visual_features":
                spaces.Box(
                    low=np.finfo(np.float32).min,
                    high=np.finfo(np.float32).max,
                    shape=self._encoder.output_shape,
                    dtype=np.float32,
                ),
                **obs_space.spaces,
            })
            with torch.no_grad():
                batch["visual_features"] = self._encoder(batch)

        rollouts = RolloutStorage(
            ppo_cfg.num_steps,
            self.envs.num_envs,
            obs_space,
            self.envs.action_spaces[0],
            ppo_cfg.hidden_size,
            num_recurrent_layers=self.actor_critic.net.num_recurrent_layers,
        )
        rollouts.to(self.device)

        for sensor in rollouts.observations:
            rollouts.observations[sensor][0].copy_(batch[sensor])

        # batch and observations may contain shared PyTorch CUDA
        # tensors.  We must explicitly clear them here otherwise
        # they will be kept in memory for the entire duration of training!
        batch = None
        observations = None

        current_episode_reward = torch.zeros(self.envs.num_envs,
                                             1,
                                             device=self.device)
        running_episode_stats = dict(
            count=torch.zeros(self.envs.num_envs, 1, device=self.device),
            reward=torch.zeros(self.envs.num_envs, 1, device=self.device),
        )
        window_episode_stats = defaultdict(
            lambda: deque(maxlen=ppo_cfg.reward_window_size))

        t_start = time.time()
        env_time = 0
        pth_time = 0
        count_steps = 0
        count_checkpoints = 0
        start_update = 0
        prev_time = 0

        lr_scheduler = LambdaLR(
            optimizer=self.agent.optimizer,
            lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES),
        )

        interrupted_state = load_interrupted_state()
        if interrupted_state is not None:
            self.agent.load_state_dict(interrupted_state["state_dict"])
            self.agent.optimizer.load_state_dict(
                interrupted_state["optim_state"])
            lr_scheduler.load_state_dict(interrupted_state["lr_sched_state"])

            requeue_stats = interrupted_state["requeue_stats"]
            env_time = requeue_stats["env_time"]
            pth_time = requeue_stats["pth_time"]
            count_steps = requeue_stats["count_steps"]
            count_checkpoints = requeue_stats["count_checkpoints"]
            start_update = requeue_stats["start_update"]
            prev_time = requeue_stats["prev_time"]

        with (TensorboardWriter(self.config.TENSORBOARD_DIR,
                                flush_secs=self.flush_secs)
              if self.world_rank == 0 else contextlib.suppress()) as writer:
            for update in range(start_update, self.config.NUM_UPDATES):
                if ppo_cfg.use_linear_lr_decay:
                    lr_scheduler.step()

                if ppo_cfg.use_linear_clip_decay:
                    self.agent.clip_param = ppo_cfg.clip_param * linear_decay(
                        update, self.config.NUM_UPDATES)

                if EXIT.is_set():
                    self.envs.close()

                    if REQUEUE.is_set() and self.world_rank == 0:
                        requeue_stats = dict(
                            env_time=env_time,
                            pth_time=pth_time,
                            count_steps=count_steps,
                            count_checkpoints=count_checkpoints,
                            start_update=update,
                            prev_time=(time.time() - t_start) + prev_time,
                        )
                        save_interrupted_state(
                            dict(
                                state_dict=self.agent.state_dict(),
                                optim_state=self.agent.optimizer.state_dict(),
                                lr_sched_state=lr_scheduler.state_dict(),
                                config=self.config,
                                requeue_stats=requeue_stats,
                            ))

                    requeue_job()
                    return

                count_steps_delta = 0
                self.agent.eval()
                for step in range(ppo_cfg.num_steps):

                    (
                        delta_pth_time,
                        delta_env_time,
                        delta_steps,
                    ) = self._collect_rollout_step(rollouts,
                                                   current_episode_reward,
                                                   running_episode_stats)
                    pth_time += delta_pth_time
                    env_time += delta_env_time
                    count_steps_delta += delta_steps

                    # This is where the preemption of workers happens.  If a
                    # worker detects it will be a straggler, it preempts itself!
                    if (step >=
                            ppo_cfg.num_steps * self.SHORT_ROLLOUT_THRESHOLD
                        ) and int(num_rollouts_done_store.get("num_done")) > (
                            self.config.RL.DDPPO.sync_frac * self.world_size):
                        break

                num_rollouts_done_store.add("num_done", 1)

                self.agent.train()
                if self._static_encoder:
                    self._encoder.eval()

                (
                    delta_pth_time,
                    value_loss,
                    action_loss,
                    dist_entropy,
                ) = self._update_agent(ppo_cfg, rollouts)
                pth_time += delta_pth_time

                stats_ordering = list(sorted(running_episode_stats.keys()))
                stats = torch.stack(
                    [running_episode_stats[k] for k in stats_ordering], 0)
                distrib.all_reduce(stats)

                for i, k in enumerate(stats_ordering):
                    window_episode_stats[k].append(stats[i].clone())

                stats = torch.tensor(
                    [value_loss, action_loss, count_steps_delta],
                    device=self.device,
                )
                distrib.all_reduce(stats)
                count_steps += stats[2].item()

                if self.world_rank == 0:
                    num_rollouts_done_store.set("num_done", "0")

                    losses = [
                        stats[0].item() / self.world_size,
                        stats[1].item() / self.world_size,
                    ]
                    deltas = {
                        k: ((v[-1] - v[0]).sum().item()
                            if len(v) > 1 else v[0].sum().item())
                        for k, v in window_episode_stats.items()
                    }
                    deltas["count"] = max(deltas["count"], 1.0)

                    writer.add_scalar(
                        "reward",
                        deltas["reward"] / deltas["count"],
                        count_steps,
                    )

                    # Check to see if there are any metrics
                    # that haven't been logged yet
                    metrics = {
                        k: v / deltas["count"]
                        for k, v in deltas.items()
                        if k not in {"reward", "count"}
                    }
                    if len(metrics) > 0:
                        writer.add_scalars("metrics", metrics, count_steps)

                    writer.add_scalars(
                        "losses",
                        {k: l
                         for l, k in zip(losses, ["value", "policy"])},
                        count_steps,
                    )

                    # log stats
                    if update > 0 and update % self.config.LOG_INTERVAL == 0:
                        logger.info("update: {}\tfps: {:.3f}\t".format(
                            update,
                            count_steps /
                            ((time.time() - t_start) + prev_time),
                        ))

                        logger.info(
                            "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t"
                            "frames: {}".format(update, env_time, pth_time,
                                                count_steps))
                        logger.info("Average window size: {}  {}".format(
                            len(window_episode_stats["count"]),
                            "  ".join(
                                "{}: {:.3f}".format(k, v / deltas["count"])
                                for k, v in deltas.items() if k != "count"),
                        ))

                    # checkpoint model
                    if update % self.config.CHECKPOINT_INTERVAL == 0:
                        self.save_checkpoint(
                            f"ckpt.{count_checkpoints}.pth",
                            dict(step=count_steps),
                        )
                        count_checkpoints += 1

            self.envs.close()
Beispiel #11
0
    def __init__(self, config: Config) -> None:
        """Constructor

        :param config: config for the environment. Should contain id for
            simulator and ``task_name`` which are passed into ``make_sim`` and
            ``make_task``.
        :param dataset: reference to dataset for task instance level
            information. Can be defined as :py:`None` in which case
            ``_episodes`` should be populated from outside.
        """

        assert config.is_frozen(), ("Freeze the config before creating the "
                                    "environment, use config.freeze().")
        self._config = config
        self._current_episode_index = None
        self._current_episode = None
        iter_option_dict = {
            k.lower(): v
            for k, v in config.ENVIRONMENT.ITERATOR_OPTIONS.items()
        }

        self._scenes = config.DATASET.CONTENT_SCENES
        self._swap_building_every = config.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_EPISODES
        print('[HabitatEnv] Total {} scenes : '.format(len(self._scenes)),
              self._scenes)
        print('[HabitatEnv] swap building every', self._swap_building_every)
        self._current_scene_episode_idx = 0
        self._current_scene_idx = 0

        self._config.defrost()
        if 'mp3d' in config.DATASET.DATA_PATH:
            self._config.SIMULATOR.SCENE = os.path.join(
                config.DATASET.SCENES_DIR,
                'mp3d/{}/{}.glb'.format(self._scenes[0], self._scenes[0]))
        else:
            self._config.SIMULATOR.SCENE = os.path.join(
                config.DATASET.SCENES_DIR,
                'gibson_habitat/{}.glb'.format(self._scenes[0]))
            if not os.path.exists(self._config.SIMULATOR.SCENE):
                self._config.SIMULATOR.SCENE = os.path.join(
                    self._config.DATASET.SCENES_DIR,
                    'gibson_more/{}.glb'.format(self._scenes[0]))
        self._config.freeze()

        self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE,
                             config=self._config.SIMULATOR)
        self._task = make_task(self._config.TASK.TYPE,
                               config=self._config.TASK,
                               sim=self._sim)
        self.observation_space = SpaceDict({
            **self._sim.sensor_suite.observation_spaces.spaces,
            **self._task.sensor_suite.observation_spaces.spaces,
        })
        self.action_space = self._task.action_space
        self._max_episode_seconds = (
            self._config.ENVIRONMENT.MAX_EPISODE_SECONDS)
        self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS
        self._elapsed_steps = 0
        self._episode_start_time: Optional[float] = None
        self._episode_over = False

        #TODO listup demonstration data
        self._episode_dataset = {}
Beispiel #12
0
    def __init__(self, config: Config, dataset: Optional[Dataset] = None):
        self.noise = NOISY
        print('[VisTargetNavEnv] NOISY ACTUATION : ', self.noise)
        if hasattr(config, 'AGENT_TASK'):
            self.agent_task = config.AGENT_TASK
        else:
            self.agent_task = 'search'
        if self.agent_task == 'homing':
            self.num_goals = 2
        else:
            self.num_goals = config.NUM_GOALS

        task_config = config.TASK_CONFIG
        task_config.defrost()
        #task_config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250
        task_config.TASK.TOP_DOWN_MAP.DRAW_SOURCE = True
        task_config.TASK.TOP_DOWN_MAP.DRAW_SHORTEST_PATH = True
        task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.VISIBILITY_DIST = 2.0
        task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.FOV = 360
        task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.DRAW = True
        task_config.TASK.TOP_DOWN_MAP.DRAW_VIEW_POINTS = False
        task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_POSITIONS = True
        task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_AABBS = False
        if ('GMT' in config.POLICY or 'NTS' in config.POLICY) and RENDER:
            task_config.TASK.TOP_DOWN_GRAPH_MAP = config.TASK_CONFIG.TASK.TOP_DOWN_MAP.clone(
            )
            if 'GMT' in config.POLICY:
                task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = "TopDownGraphMap"
            elif 'NTS' in config.POLICY:
                task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = 'NTSGraphMap'
                task_config.TASK.TOP_DOWN_GRAPH_MAP.MAP_RESOLUTION = 4000
            task_config.TASK.TOP_DOWN_GRAPH_MAP.NUM_TOPDOWN_MAP_SAMPLE_POINTS = 20000
            task_config.TASK.MEASUREMENTS += ['TOP_DOWN_GRAPH_MAP']
            if 'TOP_DOWN_MAP' in config.TASK_CONFIG.TASK.MEASUREMENTS:
                task_config.TASK.MEASUREMENTS = [
                    k for k in task_config.TASK.MEASUREMENTS
                    if 'TOP_DOWN_MAP' != k
                ]
        task_config.SIMULATOR.ACTION_SPACE_CONFIG = "CustomActionSpaceConfiguration"
        task_config.TASK.POSSIBLE_ACTIONS = task_config.TASK.POSSIBLE_ACTIONS + [
            'NOISY_FORWARD', 'NOISY_RIGHT', 'NOISY_LEFT'
        ]
        task_config.TASK.ACTIONS.NOISY_FORWARD = habitat.config.Config()
        task_config.TASK.ACTIONS.NOISY_FORWARD.TYPE = "NOISYFORWARD"
        task_config.TASK.ACTIONS.NOISY_RIGHT = habitat.config.Config()
        task_config.TASK.ACTIONS.NOISY_RIGHT.TYPE = "NOISYRIGHT"
        task_config.TASK.ACTIONS.NOISY_LEFT = habitat.config.Config()
        task_config.TASK.ACTIONS.NOISY_LEFT.TYPE = "NOISYLEFT"
        task_config.TASK.MEASUREMENTS = [
            'GOAL_INDEX'
        ] + task_config.TASK.MEASUREMENTS + ['SOFT_SPL']
        task_config.TASK.DISTANCE_TO_GOAL.TYPE = 'Custom_DistanceToGoal'
        if self.agent_task != 'search':
            task_config.TASK.SPL.TYPE = 'Custom_SPL'
        task_config.TASK.SOFT_SPL.TYPE = 'Custom_SoftSPL'
        task_config.TASK.GOAL_INDEX = task_config.TASK.SPL.clone()
        task_config.TASK.GOAL_INDEX.TYPE = 'GoalIndex'
        task_config.freeze()
        self.config = config
        self._core_env_config = config.TASK_CONFIG
        self._reward_measure_name = config.REWARD_METHOD
        self._success_measure_name = config.SUCCESS_MEASURE
        self.success_distance = config.SUCCESS_DISTANCE
        self._previous_measure = None
        self._previous_action = -1
        self.time_t = 0
        self.stuck = 0
        self.follower = None
        if 'NOISY_FORWARD' not in HabitatSimActions:
            HabitatSimActions.extend_action_space("NOISY_FORWARD")
            HabitatSimActions.extend_action_space("NOISY_RIGHT")
            HabitatSimActions.extend_action_space("NOISY_LEFT")
        if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS:
            self.action_dict = {
                0: HabitatSimActions.STOP,
                1: "NOISY_FORWARD",
                2: "NOISY_LEFT",
                3: "NOISY_RIGHT"
            }
        else:
            self.action_dict = {
                0: "NOISY_FORWARD",
                1: "NOISY_LEFT",
                2: "NOISY_RIGHT"
            }
        super().__init__(self._core_env_config, dataset)
        act_dict = {
            "MOVE_FORWARD": EmptySpace(),
            'TURN_LEFT': EmptySpace(),
            'TURN_RIGHT': EmptySpace()
        }
        if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS:
            act_dict.update({'STOP': EmptySpace()})
        self.action_space = ActionSpace(act_dict)
        obs_dict = {
            'panoramic_rgb':
            self.habitat_env._task.sensor_suite.observation_spaces.
            spaces['panoramic_rgb'],
            'panoramic_depth':
            self.habitat_env._task.sensor_suite.observation_spaces.
            spaces['panoramic_depth'],
            'target_goal':
            self.habitat_env._task.sensor_suite.observation_spaces.
            spaces['target_goal'],
            'step':
            Box(low=np.array(0), high=np.array(500), dtype=np.float32),
            'prev_act':
            Box(low=np.array(-1),
                high=np.array(self.action_space.n),
                dtype=np.int32),
            'gt_action':
            Box(low=np.array(-1),
                high=np.array(self.action_space.n),
                dtype=np.int32),
            'position':
            Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32),
            'target_pose':
            Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32),
            'distance':
            Box(low=-np.Inf, high=np.Inf, shape=(1, ), dtype=np.float32),
        }
        if 'GMT' in config.POLICY and RENDER:
            self.mapper = self.habitat_env.task.measurements.measures[
                'top_down_map']
            #obs_dict.update({'unexplored':Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32),
            #    'neighbors': Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32),})
        else:
            self.mapper = None
        if 'aux' in self.config.POLICY:
            self.return_have_been = True
            self.return_target_dist_score = True
            obs_dict.update({
                'have_been':
                Box(low=0, high=1, shape=(1, ), dtype=np.int32),
                'target_dist_score':
                Box(low=0, high=1, shape=(1, ), dtype=np.float32),
            })
        else:
            self.return_have_been = False
            self.return_target_dist_score = False

        self.observation_space = SpaceDict(obs_dict)

        if config.DIFFICULTY == 'easy':
            self.habitat_env.difficulty = 'easy'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 1.5, 3.0
        elif config.DIFFICULTY == 'medium':
            self.habitat_env.difficulty = 'medium'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 5.0
        elif config.DIFFICULTY == 'hard':
            self.habitat_env.difficulty = 'hard'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 5.0, 10.0
        elif config.DIFFICULTY == 'random':
            self.habitat_env.difficulty = 'random'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 10.0
        else:
            raise NotImplementedError

        self.habitat_env._num_goals = self.num_goals
        self.habitat_env._agent_task = self.agent_task
        print('current task : %s' % (self.agent_task))
        print('current difficulty %s, MIN_DIST %f, MAX_DIST %f - # goals %d' %
              (config.DIFFICULTY, self.habitat_env.MIN_DIST,
               self.habitat_env.MAX_DIST, self.habitat_env._num_goals))

        self.min_measure = self.habitat_env.MAX_DIST
        self.reward_method = config.REWARD_METHOD
        if self.reward_method == 'progress':
            self.get_reward = self.get_progress_reward
        elif self.reward_method == 'milestone':
            self.get_reward = self.get_milestone_reward
        elif self.reward_method == 'coverage':
            self.get_reward = self.get_coverage_reward

        self.run_mode = 'RL'
        self.number_of_episodes = 1000
        self.need_gt_action = False
        self.has_log_info = None