def __init__(self, config: Config, dataset: Optional[Dataset] = None) -> None: assert config.is_frozen(), ("Freeze the config before creating the " "environment, use config.freeze()") self._config = config self._dataset = dataset self._current_episode_index = None if self._dataset is None and config.DATASET.TYPE: self._dataset = make_dataset(id_dataset=config.DATASET.TYPE, config=config.DATASET) self._episodes = self._dataset.episodes if self._dataset else [] self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE, config=self._config.SIMULATOR) self._task = make_task( self._config.TASK.TYPE, task_config=self._config.TASK, sim=self._sim, dataset=dataset, ) self.observation_space = SpaceDict({ **self._sim.sensor_suite.observation_spaces.spaces, **self._task.sensor_suite.observation_spaces.spaces, }) self.action_space = self._sim.action_space self._max_episode_seconds = ( self._config.ENVIRONMENT.MAX_EPISODE_SECONDS) self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS self._elapsed_steps = 0 self._episode_start_time: Optional[float] = None self._episode_over = False
def __init__(self, config: Config, dataset: Optional[Dataset] = None) -> None: """Constructor :param config: config for the environment. Should contain id for simulator and ``task_name`` which are passed into ``make_sim`` and ``make_task``. :param dataset: reference to dataset for task instance level information. Can be defined as :py:`None` in which case ``_episodes`` should be populated from outside. """ assert config.is_frozen(), ("Freeze the config before creating the " "environment, use config.freeze().") self._config = config self._dataset = dataset self._current_episode_index = None if self._dataset is None and config.DATASET.TYPE: self._dataset = make_dataset(id_dataset=config.DATASET.TYPE, config=config.DATASET) self._episodes = self._dataset.episodes if self._dataset else [] self._current_episode = None iter_option_dict = { k.lower(): v for k, v in config.ENVIRONMENT.ITERATOR_OPTIONS.items() } self._episode_iterator = self._dataset.get_episode_iterator( **iter_option_dict) # load the first scene if dataset is present if self._dataset: assert (len(self._dataset.episodes) > 0), "dataset should have non-empty episodes list" self._config.defrost() self._config.SIMULATOR.SCENE = self._dataset.episodes[0].scene_id self._config.freeze() self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE, config=self._config.SIMULATOR) self._task = make_task( self._config.TASK.TYPE, config=self._config.TASK, sim=self._sim, dataset=self._dataset, ) self.observation_space = SpaceDict({ **self._sim.sensor_suite.observation_spaces.spaces, **self._task.sensor_suite.observation_spaces.spaces, }) self.action_space = self._task.action_space self._max_episode_seconds = ( self._config.ENVIRONMENT.MAX_EPISODE_SECONDS) self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS self._elapsed_steps = 0 self._episode_start_time: Optional[float] = None self._episode_over = False
def __init__(self, sensors: List[Sensor]) -> None: self.sensors = OrderedDict() spaces: OrderedDict[str, Space] = OrderedDict() for sensor in sensors: assert ( sensor.uuid not in self.sensors ), "'{}' is duplicated sensor uuid".format(sensor.uuid) self.sensors[sensor.uuid] = sensor spaces[sensor.uuid] = sensor.observation_space self.observation_spaces = SpaceDict(spaces=spaces)
def _setup_actor_critic_agent(self, observations, ppo_cfg: Config) -> None: # Global policy observation space self.obs_space = self.envs.observation_spaces[0] # add the map observation space self.obs_space = SpaceDict( { "map_sum": spaces.Box( low=0, high=1, shape=observations[0]["map_sum"].shape, dtype=np.uint8, ), "curr_pose": spaces.Box( low=0, high=1, shape=observations[0]["curr_pose"].shape, dtype=np.uint8, ), **self.obs_space.spaces, } ) # print("*************************** self.obs_space:", self.obs_space) #self.obs_space: Dict(compass:Box(1,), depth:Box(256, 256, 1), gps:Box(2,), map_sum:Box(480, 480, 23), objectgoal:Box(1,), rgb:Box(256, 256, 3), semantic:Box(256, 256)) # Global policy action space self.g_action_space = spaces.Box(low=0.0, high=1.0, shape=(2,), dtype=np.float32) self.actor_critic = ObjectNavGraphSLAMPolicy( observation_space=self.obs_space, g_action_space=self.g_action_space, l_action_space=self.envs.action_spaces[0], pretrain_path = None, output_size = self.config.RL.SLAMDDPPO.map_output_size, ) self.actor_critic.to(self.device) print("*************************** action_space", self.envs.action_spaces[0].n) # print("*************************** action_space n", self.envs.action_spaces) self.agent = DDPPOSLAM( actor_critic=self.actor_critic, clip_param=ppo_cfg.clip_param, ppo_epoch=ppo_cfg.ppo_epoch, num_mini_batch=ppo_cfg.num_mini_batch, value_loss_coef=ppo_cfg.value_loss_coef, entropy_coef=ppo_cfg.entropy_coef, lr=ppo_cfg.lr, eps=ppo_cfg.eps, max_grad_norm=ppo_cfg.max_grad_norm, use_normalized_advantage=ppo_cfg.use_normalized_advantage, )
def __init__(self, forward_step, angle_step, is_blind=False, sensors=["RGB_SENSOR"]): config = habitat.get_config() log_mesg("env: forward_step: {}, angle_step: {}".format( forward_step, angle_step)) config.defrost() config.PYROBOT.SENSORS = sensors config.PYROBOT.RGB_SENSOR.WIDTH = 256 config.PYROBOT.RGB_SENSOR.HEIGHT = 256 config.PYROBOT.DEPTH_SENSOR.WIDTH = 256 config.PYROBOT.DEPTH_SENSOR.HEIGHT = 256 config.PYROBOT.DEPTH_SENSOR.MAX_DEPTH = 10 config.PYROBOT.DEPTH_SENSOR.MIN_DEPTH = 0.3 config.freeze() self._reality = make_sim(id_sim="PyRobot-v0", config=config.PYROBOT) self._angle = (angle_step / 180) * np.pi self._pointgoal_key = "pointgoal_with_gps_compass" self.is_blind = is_blind if not is_blind: sensors_dict = { **self._reality.sensor_suite.observation_spaces.spaces } else: sensors_dict = {} sensors_dict[self._pointgoal_key] = spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(2, ), dtype=np.float32, ) self.observation_space = SpaceDict(sensors_dict) self.action_space = spaces.Discrete(4) self._actions = { "forward": [forward_step, 0, 0], "left": [0, 0, self._angle], "right": [0, 0, -self._angle], "stop": [0, 0, 0], } self._process = None self._last_time = -1
def load_model(): depth_256_space = SpaceDict({ 'depth': spaces.Box(low=0., high=1., shape=(256, 256, 1)), 'pointgoal_with_gps_compass': spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(2, ), dtype=np.float32, ) }) if GAUSSIAN: action_space = spaces.Box(np.array([float('-inf'), float('-inf')]), np.array([float('inf'), float('inf')])) action_distribution = 'gaussian' dim_actions = 2 elif DISCRETE_4: action_space = spaces.Discrete(4) action_distribution = 'categorical' dim_actions = 4 elif DISCRETE_6: action_space = spaces.Discrete(6) action_distribution = 'categorical' dim_actions = 6 model = PointNavResNetPolicy(observation_space=depth_256_space, action_space=action_space, hidden_size=512, rnn_type='LSTM', num_recurrent_layers=2, backbone='resnet50', normalize_visual_inputs=False, action_distribution=action_distribution, dim_actions=dim_actions) model.to(torch.device("cpu")) data_dict = OrderedDict() with open(WEIGHTS_PATH, 'r') as f: data_dict = json.load(f) model.load_state_dict({ k[len("actor_critic."):]: torch.tensor(v) for k, v in data_dict.items() if k.startswith("actor_critic.") }) return model
def __init__(self, sensors: Iterable[Sensor]) -> None: """Constructor :param sensors: list containing sensors for the environment, uuid of each sensor must be unique. """ self.sensors = OrderedDict() spaces: OrderedDict[str, Space] = OrderedDict() for sensor in sensors: assert ( sensor.uuid not in self.sensors ), "'{}' is duplicated sensor uuid".format(sensor.uuid) self.sensors[sensor.uuid] = sensor spaces[sensor.uuid] = sensor.observation_space self.observation_spaces = SpaceDict(spaces=spaces)
def __init__(self, config: Config, dataset: Optional[Dataset] = None) -> None: assert config.is_frozen(), ("Freeze the config before creating the " "environment, use config.freeze().") self._config = config self._dataset = dataset self._current_episode_index = None if self._dataset is None and config.DATASET.TYPE: self._dataset = make_dataset(id_dataset=config.DATASET.TYPE, config=config.DATASET) self._episodes = self._dataset.episodes if self._dataset else [] self._current_episode = None iter_option_dict = { k.lower(): v for k, v in config.ENVIRONMENT.ITERATOR_OPTIONS.items() } self._episode_iterator = self._dataset.get_episode_iterator( **iter_option_dict) # load the first scene if dataset is present if self._dataset: assert (len(self._dataset.episodes) > 0), "dataset should have non-empty episodes list" self._config.defrost() self._config.SIMULATOR.SCENE = self._dataset.episodes[0].scene_id self._config.freeze() self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE, config=self._config.SIMULATOR) self._task = make_task( self._config.TASK.TYPE, task_config=self._config.TASK, sim=self._sim, dataset=self._dataset, ) self.observation_space = SpaceDict({ **self._sim.sensor_suite.observation_spaces.spaces, **self._task.sensor_suite.observation_spaces.spaces, }) self.action_space = self._sim.action_space self._max_episode_seconds = ( self._config.ENVIRONMENT.MAX_EPISODE_SECONDS) self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS self._elapsed_steps = 0 self._episode_start_time: Optional[float] = None self._episode_over = False
def load_model(weights_path, dim_actions): # DON'T CHANGE depth_256_space = SpaceDict({ 'depth': spaces.Box(low=0., high=1., shape=(240, 320, 1)), 'pointgoal_with_gps_compass': spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(2, ), dtype=np.float32, ) }) action_space = spaces.Box(np.array([float('-inf'), float('-inf')]), np.array([float('inf'), float('inf')])) action_distribution = 'gaussian' # action_space = spaces.Discrete(4) # action_distribution = 'categorical' model = PointNavResNetPolicy(observation_space=depth_256_space, action_space=action_space, hidden_size=512, rnn_type='LSTM', num_recurrent_layers=2, backbone='resnet18', normalize_visual_inputs=False, action_distribution=action_distribution, dim_actions=dim_actions) model.to(torch.device(DEVICE)) state_dict = OrderedDict() with open(weights_path, 'r') as f: state_dict = json.load(f) # state_dict = torch.load(weights_path, map_location=DEVICE) model.load_state_dict({ k[len("actor_critic."):]: torch.tensor(v) for k, v in state_dict.items() if k.startswith("actor_critic.") }) return model
def train(self) -> None: r"""Main method for DD-PPO. Returns: None """ self.local_rank, tcp_store = init_distrib_slurm( self.config.RL.DDPPO.distrib_backend) add_signal_handlers() # Stores the number of workers that have finished their rollout num_rollouts_done_store = distrib.PrefixStore("rollout_tracker", tcp_store) num_rollouts_done_store.set("num_done", "0") self.world_rank = distrib.get_rank() self.world_size = distrib.get_world_size() self.config.defrost() self.config.TORCH_GPU_ID = self.local_rank self.config.SIMULATOR_GPU_ID = self.local_rank # Multiply by the number of simulators to make sure they also get unique seeds self.config.TASK_CONFIG.SEED += (self.world_rank * self.config.NUM_PROCESSES) self.config.freeze() random.seed(self.config.TASK_CONFIG.SEED) np.random.seed(self.config.TASK_CONFIG.SEED) torch.manual_seed(self.config.TASK_CONFIG.SEED) if torch.cuda.is_available(): self.device = torch.device("cuda", self.local_rank) torch.cuda.set_device(self.device) else: self.device = torch.device("cpu") self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) ppo_cfg = self.config.RL.PPO if (not os.path.isdir(self.config.CHECKPOINT_FOLDER) and self.world_rank == 0): os.makedirs(self.config.CHECKPOINT_FOLDER) self._setup_actor_critic_agent(ppo_cfg) self.agent.init_distributed(find_unused_params=True) if self.world_rank == 0: logger.info("agent number of trainable parameters: {}".format( sum(param.numel() for param in self.agent.parameters() if param.requires_grad))) observations = self.envs.reset() batch = batch_obs(observations, device=self.device) obs_space = self.envs.observation_spaces[0] if self._static_encoder: self._encoder = self.actor_critic.net.visual_encoder obs_space = SpaceDict({ "visual_features": spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=self._encoder.output_shape, dtype=np.float32, ), **obs_space.spaces, }) with torch.no_grad(): batch["visual_features"] = self._encoder(batch) rollouts = RolloutStorage( ppo_cfg.num_steps, self.envs.num_envs, obs_space, self.envs.action_spaces[0], ppo_cfg.hidden_size, num_recurrent_layers=self.actor_critic.net.num_recurrent_layers, ) rollouts.to(self.device) for sensor in rollouts.observations: rollouts.observations[sensor][0].copy_(batch[sensor]) # batch and observations may contain shared PyTorch CUDA # tensors. We must explicitly clear them here otherwise # they will be kept in memory for the entire duration of training! batch = None observations = None current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) running_episode_stats = dict( count=torch.zeros(self.envs.num_envs, 1, device=self.device), reward=torch.zeros(self.envs.num_envs, 1, device=self.device), ) window_episode_stats = defaultdict( lambda: deque(maxlen=ppo_cfg.reward_window_size)) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 start_update = 0 prev_time = 0 lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) interrupted_state = load_interrupted_state() if interrupted_state is not None: self.agent.load_state_dict(interrupted_state["state_dict"]) self.agent.optimizer.load_state_dict( interrupted_state["optim_state"]) lr_scheduler.load_state_dict(interrupted_state["lr_sched_state"]) requeue_stats = interrupted_state["requeue_stats"] env_time = requeue_stats["env_time"] pth_time = requeue_stats["pth_time"] count_steps = requeue_stats["count_steps"] count_checkpoints = requeue_stats["count_checkpoints"] start_update = requeue_stats["start_update"] prev_time = requeue_stats["prev_time"] with (TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) if self.world_rank == 0 else contextlib.suppress()) as writer: for update in range(start_update, self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES) if EXIT.is_set(): self.envs.close() if REQUEUE.is_set() and self.world_rank == 0: requeue_stats = dict( env_time=env_time, pth_time=pth_time, count_steps=count_steps, count_checkpoints=count_checkpoints, start_update=update, prev_time=(time.time() - t_start) + prev_time, ) save_interrupted_state( dict( state_dict=self.agent.state_dict(), optim_state=self.agent.optimizer.state_dict(), lr_sched_state=lr_scheduler.state_dict(), config=self.config, requeue_stats=requeue_stats, )) requeue_job() return count_steps_delta = 0 self.agent.eval() for step in range(ppo_cfg.num_steps): ( delta_pth_time, delta_env_time, delta_steps, ) = self._collect_rollout_step(rollouts, current_episode_reward, running_episode_stats) pth_time += delta_pth_time env_time += delta_env_time count_steps_delta += delta_steps # This is where the preemption of workers happens. If a # worker detects it will be a straggler, it preempts itself! if (step >= ppo_cfg.num_steps * self.SHORT_ROLLOUT_THRESHOLD ) and int(num_rollouts_done_store.get("num_done")) > ( self.config.RL.DDPPO.sync_frac * self.world_size): break num_rollouts_done_store.add("num_done", 1) self.agent.train() if self._static_encoder: self._encoder.eval() ( delta_pth_time, value_loss, action_loss, dist_entropy, ) = self._update_agent(ppo_cfg, rollouts) pth_time += delta_pth_time stats_ordering = list(sorted(running_episode_stats.keys())) stats = torch.stack( [running_episode_stats[k] for k in stats_ordering], 0) distrib.all_reduce(stats) for i, k in enumerate(stats_ordering): window_episode_stats[k].append(stats[i].clone()) stats = torch.tensor( [value_loss, action_loss, count_steps_delta], device=self.device, ) distrib.all_reduce(stats) count_steps += stats[2].item() if self.world_rank == 0: num_rollouts_done_store.set("num_done", "0") losses = [ stats[0].item() / self.world_size, stats[1].item() / self.world_size, ] deltas = { k: ((v[-1] - v[0]).sum().item() if len(v) > 1 else v[0].sum().item()) for k, v in window_episode_stats.items() } deltas["count"] = max(deltas["count"], 1.0) writer.add_scalar( "reward", deltas["reward"] / deltas["count"], count_steps, ) # Check to see if there are any metrics # that haven't been logged yet metrics = { k: v / deltas["count"] for k, v in deltas.items() if k not in {"reward", "count"} } if len(metrics) > 0: writer.add_scalars("metrics", metrics, count_steps) writer.add_scalars( "losses", {k: l for l, k in zip(losses, ["value", "policy"])}, count_steps, ) # log stats if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info("update: {}\tfps: {:.3f}\t".format( update, count_steps / ((time.time() - t_start) + prev_time), )) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format(update, env_time, pth_time, count_steps)) logger.info("Average window size: {} {}".format( len(window_episode_stats["count"]), " ".join( "{}: {:.3f}".format(k, v / deltas["count"]) for k, v in deltas.items() if k != "count"), )) # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint( f"ckpt.{count_checkpoints}.pth", dict(step=count_steps), ) count_checkpoints += 1 self.envs.close()
def __init__(self, config: Config) -> None: """Constructor :param config: config for the environment. Should contain id for simulator and ``task_name`` which are passed into ``make_sim`` and ``make_task``. :param dataset: reference to dataset for task instance level information. Can be defined as :py:`None` in which case ``_episodes`` should be populated from outside. """ assert config.is_frozen(), ("Freeze the config before creating the " "environment, use config.freeze().") self._config = config self._current_episode_index = None self._current_episode = None iter_option_dict = { k.lower(): v for k, v in config.ENVIRONMENT.ITERATOR_OPTIONS.items() } self._scenes = config.DATASET.CONTENT_SCENES self._swap_building_every = config.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_EPISODES print('[HabitatEnv] Total {} scenes : '.format(len(self._scenes)), self._scenes) print('[HabitatEnv] swap building every', self._swap_building_every) self._current_scene_episode_idx = 0 self._current_scene_idx = 0 self._config.defrost() if 'mp3d' in config.DATASET.DATA_PATH: self._config.SIMULATOR.SCENE = os.path.join( config.DATASET.SCENES_DIR, 'mp3d/{}/{}.glb'.format(self._scenes[0], self._scenes[0])) else: self._config.SIMULATOR.SCENE = os.path.join( config.DATASET.SCENES_DIR, 'gibson_habitat/{}.glb'.format(self._scenes[0])) if not os.path.exists(self._config.SIMULATOR.SCENE): self._config.SIMULATOR.SCENE = os.path.join( self._config.DATASET.SCENES_DIR, 'gibson_more/{}.glb'.format(self._scenes[0])) self._config.freeze() self._sim = make_sim(id_sim=self._config.SIMULATOR.TYPE, config=self._config.SIMULATOR) self._task = make_task(self._config.TASK.TYPE, config=self._config.TASK, sim=self._sim) self.observation_space = SpaceDict({ **self._sim.sensor_suite.observation_spaces.spaces, **self._task.sensor_suite.observation_spaces.spaces, }) self.action_space = self._task.action_space self._max_episode_seconds = ( self._config.ENVIRONMENT.MAX_EPISODE_SECONDS) self._max_episode_steps = self._config.ENVIRONMENT.MAX_EPISODE_STEPS self._elapsed_steps = 0 self._episode_start_time: Optional[float] = None self._episode_over = False #TODO listup demonstration data self._episode_dataset = {}
def __init__(self, config: Config, dataset: Optional[Dataset] = None): self.noise = NOISY print('[VisTargetNavEnv] NOISY ACTUATION : ', self.noise) if hasattr(config, 'AGENT_TASK'): self.agent_task = config.AGENT_TASK else: self.agent_task = 'search' if self.agent_task == 'homing': self.num_goals = 2 else: self.num_goals = config.NUM_GOALS task_config = config.TASK_CONFIG task_config.defrost() #task_config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250 task_config.TASK.TOP_DOWN_MAP.DRAW_SOURCE = True task_config.TASK.TOP_DOWN_MAP.DRAW_SHORTEST_PATH = True task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.VISIBILITY_DIST = 2.0 task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.FOV = 360 task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.DRAW = True task_config.TASK.TOP_DOWN_MAP.DRAW_VIEW_POINTS = False task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_POSITIONS = True task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_AABBS = False if ('GMT' in config.POLICY or 'NTS' in config.POLICY) and RENDER: task_config.TASK.TOP_DOWN_GRAPH_MAP = config.TASK_CONFIG.TASK.TOP_DOWN_MAP.clone( ) if 'GMT' in config.POLICY: task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = "TopDownGraphMap" elif 'NTS' in config.POLICY: task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = 'NTSGraphMap' task_config.TASK.TOP_DOWN_GRAPH_MAP.MAP_RESOLUTION = 4000 task_config.TASK.TOP_DOWN_GRAPH_MAP.NUM_TOPDOWN_MAP_SAMPLE_POINTS = 20000 task_config.TASK.MEASUREMENTS += ['TOP_DOWN_GRAPH_MAP'] if 'TOP_DOWN_MAP' in config.TASK_CONFIG.TASK.MEASUREMENTS: task_config.TASK.MEASUREMENTS = [ k for k in task_config.TASK.MEASUREMENTS if 'TOP_DOWN_MAP' != k ] task_config.SIMULATOR.ACTION_SPACE_CONFIG = "CustomActionSpaceConfiguration" task_config.TASK.POSSIBLE_ACTIONS = task_config.TASK.POSSIBLE_ACTIONS + [ 'NOISY_FORWARD', 'NOISY_RIGHT', 'NOISY_LEFT' ] task_config.TASK.ACTIONS.NOISY_FORWARD = habitat.config.Config() task_config.TASK.ACTIONS.NOISY_FORWARD.TYPE = "NOISYFORWARD" task_config.TASK.ACTIONS.NOISY_RIGHT = habitat.config.Config() task_config.TASK.ACTIONS.NOISY_RIGHT.TYPE = "NOISYRIGHT" task_config.TASK.ACTIONS.NOISY_LEFT = habitat.config.Config() task_config.TASK.ACTIONS.NOISY_LEFT.TYPE = "NOISYLEFT" task_config.TASK.MEASUREMENTS = [ 'GOAL_INDEX' ] + task_config.TASK.MEASUREMENTS + ['SOFT_SPL'] task_config.TASK.DISTANCE_TO_GOAL.TYPE = 'Custom_DistanceToGoal' if self.agent_task != 'search': task_config.TASK.SPL.TYPE = 'Custom_SPL' task_config.TASK.SOFT_SPL.TYPE = 'Custom_SoftSPL' task_config.TASK.GOAL_INDEX = task_config.TASK.SPL.clone() task_config.TASK.GOAL_INDEX.TYPE = 'GoalIndex' task_config.freeze() self.config = config self._core_env_config = config.TASK_CONFIG self._reward_measure_name = config.REWARD_METHOD self._success_measure_name = config.SUCCESS_MEASURE self.success_distance = config.SUCCESS_DISTANCE self._previous_measure = None self._previous_action = -1 self.time_t = 0 self.stuck = 0 self.follower = None if 'NOISY_FORWARD' not in HabitatSimActions: HabitatSimActions.extend_action_space("NOISY_FORWARD") HabitatSimActions.extend_action_space("NOISY_RIGHT") HabitatSimActions.extend_action_space("NOISY_LEFT") if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS: self.action_dict = { 0: HabitatSimActions.STOP, 1: "NOISY_FORWARD", 2: "NOISY_LEFT", 3: "NOISY_RIGHT" } else: self.action_dict = { 0: "NOISY_FORWARD", 1: "NOISY_LEFT", 2: "NOISY_RIGHT" } super().__init__(self._core_env_config, dataset) act_dict = { "MOVE_FORWARD": EmptySpace(), 'TURN_LEFT': EmptySpace(), 'TURN_RIGHT': EmptySpace() } if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS: act_dict.update({'STOP': EmptySpace()}) self.action_space = ActionSpace(act_dict) obs_dict = { 'panoramic_rgb': self.habitat_env._task.sensor_suite.observation_spaces. spaces['panoramic_rgb'], 'panoramic_depth': self.habitat_env._task.sensor_suite.observation_spaces. spaces['panoramic_depth'], 'target_goal': self.habitat_env._task.sensor_suite.observation_spaces. spaces['target_goal'], 'step': Box(low=np.array(0), high=np.array(500), dtype=np.float32), 'prev_act': Box(low=np.array(-1), high=np.array(self.action_space.n), dtype=np.int32), 'gt_action': Box(low=np.array(-1), high=np.array(self.action_space.n), dtype=np.int32), 'position': Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32), 'target_pose': Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32), 'distance': Box(low=-np.Inf, high=np.Inf, shape=(1, ), dtype=np.float32), } if 'GMT' in config.POLICY and RENDER: self.mapper = self.habitat_env.task.measurements.measures[ 'top_down_map'] #obs_dict.update({'unexplored':Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32), # 'neighbors': Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32),}) else: self.mapper = None if 'aux' in self.config.POLICY: self.return_have_been = True self.return_target_dist_score = True obs_dict.update({ 'have_been': Box(low=0, high=1, shape=(1, ), dtype=np.int32), 'target_dist_score': Box(low=0, high=1, shape=(1, ), dtype=np.float32), }) else: self.return_have_been = False self.return_target_dist_score = False self.observation_space = SpaceDict(obs_dict) if config.DIFFICULTY == 'easy': self.habitat_env.difficulty = 'easy' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 1.5, 3.0 elif config.DIFFICULTY == 'medium': self.habitat_env.difficulty = 'medium' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 5.0 elif config.DIFFICULTY == 'hard': self.habitat_env.difficulty = 'hard' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 5.0, 10.0 elif config.DIFFICULTY == 'random': self.habitat_env.difficulty = 'random' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 10.0 else: raise NotImplementedError self.habitat_env._num_goals = self.num_goals self.habitat_env._agent_task = self.agent_task print('current task : %s' % (self.agent_task)) print('current difficulty %s, MIN_DIST %f, MAX_DIST %f - # goals %d' % (config.DIFFICULTY, self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST, self.habitat_env._num_goals)) self.min_measure = self.habitat_env.MAX_DIST self.reward_method = config.REWARD_METHOD if self.reward_method == 'progress': self.get_reward = self.get_progress_reward elif self.reward_method == 'milestone': self.get_reward = self.get_milestone_reward elif self.reward_method == 'coverage': self.get_reward = self.get_coverage_reward self.run_mode = 'RL' self.number_of_episodes = 1000 self.need_gt_action = False self.has_log_info = None