def __init__(self, cfg: Config, num_envs, observation_spaces, device, with_training=True, tb_dir=None): super().__init__() # Get config param: self.is_training_mode = cfg.train self.experience_buffer_size = cfg.experience_buffer_size // num_envs self.with_training = with_training self.similarity_aggregation = cfg.similarity_aggregation self.similarity_percentile = cfg.similarity_percentile self.curiosity_bonus_scale_a = cfg.curiosity_bonus_scale_a self.reward_shift_b = cfg.reward_shift_b self.novelty_threshold = cfg.novelty_threshold self.num_train_epochs = cfg.num_train_epochs self.batch_size = cfg.batch_size self.max_action_distance_k = cfg.max_action_distance_k self.negative_sample_multiplier = cfg.negative_sample_multiplier self.log_freq = cfg.log_freq self.device = device # Initialize training experience memory if with_training: self.rollout = ObsExperienceRollout(cfg.experience_buffer_size, num_envs, observation_spaces, cfg.num_recurrent_steps) # Initialize reachability feature extractor & network self.rex = ReachabilityFeatures(observation_spaces, pretrained=True) self.r_net = ReachabilityNet(cfg.feature_extractor_size) # Initialize memory self._memory = torch.FloatTensor(num_envs, cfg.memory_size, cfg.feature_extractor_size).to(device) self._memory.zero_() self._memory_mask = torch.LongTensor(num_envs).to(device) self._memory_mask.zero_() # Initialize training params if with_training: optimizer = getattr(torch.optim, cfg.optimizer) self.optimizer = optimizer( list(self.rex.parameters()) + list(self.r_net.parameters()), **dict(cfg.optimizer_args)) self.criterion = nn.CrossEntropyLoss() self._step = 0 self.is_trained = torch.BoolTensor( [False]) # So as to be saved in checkpoint assert tb_dir is not None, "No tensorboard directory set" self.tb_writer = TensorboardWriter(tb_dir, flush_secs=30)
def generate_video( video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], episode_id: Union[int, str], checkpoint_idx: int, metrics: Dict[str, float], tb_writer: TensorboardWriter, fps: int = 10, ) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. metric_name: name of the performance metric, e.g. "spl". metric_value: value of metric. tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. Returns: None """ if len(images) < 1: return metric_strs = [] for k, v in metrics.items(): if isinstance(v, str): metric_strs.append(f"{k}={v}") else: metric_strs.append(f"{k}={v:.2f}") video_name = f"episode={episode_id}-ckpt={checkpoint_idx}-" + "-".join( metric_strs) if "disk" in video_option: assert video_dir is not None images_to_video(images, video_dir, video_name, fps=fps) if "tensorboard" in video_option: tb_writer.add_video_from_np_images(f"episode{episode_id}", checkpoint_idx, images, fps=fps) return video_name
def eval(self) -> None: r"""Main method of trainer evaluation. Calls _eval_checkpoint() that is specified in Trainer class that inherits from BaseRLTrainer or BaseILTrainer Returns: None """ self.device = ( torch.device("cuda", self.config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu") ) if "tensorboard" in self.config.VIDEO_OPTION: assert ( len(self.config.TENSORBOARD_DIR) > 0 ), "Must specify a tensorboard directory for video display" os.makedirs(self.config.TENSORBOARD_DIR, exist_ok=True) if "disk" in self.config.VIDEO_OPTION: assert ( len(self.config.VIDEO_DIR) > 0 ), "Must specify a directory for storing videos on disk" with TensorboardWriter( self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs ) as writer: if os.path.isfile(self.config.EVAL_CKPT_PATH_DIR): # evaluate singe checkpoint proposed_index = get_checkpoint_id( self.config.EVAL_CKPT_PATH_DIR ) if proposed_index is not None: ckpt_idx = proposed_index else: ckpt_idx = 0 self._eval_checkpoint( self.config.EVAL_CKPT_PATH_DIR, writer, checkpoint_index=ckpt_idx, ) else: # evaluate multiple checkpoints in order prev_ckpt_ind = -1 while True: current_ckpt = None while current_ckpt is None: current_ckpt = poll_checkpoint_folder( self.config.EVAL_CKPT_PATH_DIR, prev_ckpt_ind ) time.sleep(2) # sleep for 2 secs before polling again logger.info(f"=======current_ckpt: {current_ckpt}=======") prev_ckpt_ind += 1 self._eval_checkpoint( checkpoint_path=current_ckpt, writer=writer, checkpoint_index=prev_ckpt_ind, )
def eval(self, eval_ckpt=None, log_diagnostics=[], output_dir=".", label="eval") -> None: r"""Main method of trainer evaluation. Calls _eval_checkpoint() that is specified in Trainer class that inherits from BaseRLTrainer Returns: None """ self.device = (torch.device("cuda", self.config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) if "tensorboard" in self.config.VIDEO_OPTION: assert (len(self.config.TENSORBOARD_DIR) > 0 ), "Must specify a tensorboard directory for video display" if "disk" in self.config.VIDEO_OPTION: assert (len(self.config.VIDEO_DIR) > 0), "Must specify a directory for storing videos on disk" with TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) as writer: if eval_ckpt is not None: # evaluate a single checkpoint from path ckpt_index = os.path.split(eval_ckpt)[1].split(".")[-2] self._eval_checkpoint(eval_ckpt, writer, checkpoint_index=ckpt_index, log_diagnostics=log_diagnostics, output_dir=output_dir, label=label) else: if os.path.isfile(self.config.EVAL_CKPT_PATH_DIR): # evaluate singe checkpoint # parse checkpoint from filename ckpt_index = self.config.EVAL_CKPT_PATH_DIR.split('.')[-2] self._eval_checkpoint(self.config.EVAL_CKPT_PATH_DIR, writer, checkpoint_index=ckpt_index) else: # evaluate multiple checkpoints in order prev_ckpt_ind = -1 while True: current_ckpt = None while current_ckpt is None: current_ckpt = poll_checkpoint_folder( self.config.EVAL_CKPT_PATH_DIR, prev_ckpt_ind) time.sleep( 2) # sleep for 2 secs before polling again logger.info( f"=======current_ckpt: {current_ckpt}=======") prev_ckpt_ind += 1 self._eval_checkpoint( checkpoint_path=current_ckpt, writer=writer, checkpoint_index=prev_ckpt_ind, )
def generate_video( video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], episode_id: int, checkpoint_idx: int, tag: str, metrics: Dict[str, float], tb_writer: TensorboardWriter, fps: int = 10, ) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. info: metric dictionary tag: Additional tag for naming video tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. Returns: None """ print(len(images)) if len(images) < 1: return metric_strs = [] for k, v in metrics.items(): metric_strs.append(f"{k}={v:.2f}") video_name = f"{tag}_episode={episode_id}-ckpt={checkpoint_idx}-" + "-".join( metric_strs ) if "disk" in video_option: assert video_dir is not None images_to_video(images, video_dir, video_name) if "tensorboard" in video_option: tb_writer.add_video_from_np_images( f"episode{episode_id}", checkpoint_idx, images, fps=fps )
def generate_video( video_option: List[str], video_dir: Optional[str], images: List[np.ndarray], episode_id: int, checkpoint_idx: int, metric_name: str, metric_value: float, tb_writer: TensorboardWriter, fps: int = 10, ) -> None: r"""Generate video according to specified information. Args: video_option: string list of "tensorboard" or "disk" or both. video_dir: path to target video directory. images: list of images to be converted to video. episode_id: episode id for video naming. checkpoint_idx: checkpoint index for video naming. metric_name: name of the performance metric, e.g. "spl". metric_value: value of metric. tb_writer: tensorboard writer object for uploading video. fps: fps for generated video. Returns: None """ if len(images) < 1: return video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_{metric_name}{metric_value:.2f}" if "disk" in video_option: assert video_dir is not None images_to_video(images, video_dir, video_name) if "tensorboard" in video_option: tb_writer.add_video_from_np_images(f"episode{episode_id}", checkpoint_idx, images, fps=fps)
def get_logger(config, args, flush_secs): import sys sys.path.insert(0, './') from method.orp_log_adapter import CustomLogger if config.write_tb: real_tb_dir = os.path.join(config.TENSORBOARD_DIR, args.prefix) config.defrost() # Inject the prefix into all of the filepaths config.VIDEO_DIR = os.path.join(config.VIDEO_DIR, args.prefix) config.CHECKPOINT_FOLDER = os.path.join(config.CHECKPOINT_FOLDER, args.prefix) if not os.path.exists(config.VIDEO_DIR): os.makedirs(config.VIDEO_DIR) if not os.path.exists(config.CHECKPOINT_FOLDER): os.makedirs(config.CHECKPOINT_FOLDER) config.freeze() if not os.path.exists(real_tb_dir): os.makedirs(real_tb_dir) ret = TensorboardWriter(real_tb_dir, flush_secs=flush_secs) else: ret = CustomLogger(not config.no_wb, args, config) out_cfg_path = os.path.join(config.CHECKPOINT_FOLDER, 'cfg.txt') print('out path is ', out_cfg_path) with open(out_cfg_path, 'w') as f: f.write(str(config)) f.write('\n') f.write(str(args)) try: out = subprocess.check_output( ['git', '-C', '../habitat-sim/.git', 'rev-parse', 'HEAD']) print(f"Using HabSim version {out}") f.write("hab sim version " + str(out) + "\n") except: print("Could not find HabSim version") return ret
def train(self) -> None: r"""Main method for training PPO. Returns: None """ self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) ppo_cfg = self.config.RL.PPO self.device = (torch.device("cuda", self.config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) if not os.path.isdir(self.config.CHECKPOINT_FOLDER): os.makedirs(self.config.CHECKPOINT_FOLDER) self._setup_actor_critic_agent(ppo_cfg) logger.info("agent number of parameters: {}".format( sum(param.numel() for param in self.agent.parameters()))) rollouts = RolloutStorage( ppo_cfg.num_steps, self.envs.num_envs, self.envs.observation_spaces[0], self.envs.action_spaces[0], ppo_cfg.hidden_size, ) rollouts.to(self.device) observations = self.envs.reset() batch = batch_obs(observations, device=self.device) for sensor in rollouts.observations: rollouts.observations[sensor][0].copy_(batch[sensor]) # batch and observations may contain shared PyTorch CUDA # tensors. We must explicitly clear them here otherwise # they will be kept in memory for the entire duration of training! batch = None observations = None current_episode_reward = torch.zeros(self.envs.num_envs, 1) running_episode_stats = dict( count=torch.zeros(self.envs.num_envs, 1), reward=torch.zeros(self.envs.num_envs, 1), ) window_episode_stats = defaultdict( lambda: deque(maxlen=ppo_cfg.reward_window_size)) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) with TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) as writer: for update in range(self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES) for step in range(ppo_cfg.num_steps): ( delta_pth_time, delta_env_time, delta_steps, ) = self._collect_rollout_step(rollouts, current_episode_reward, running_episode_stats) pth_time += delta_pth_time env_time += delta_env_time count_steps += delta_steps ( delta_pth_time, value_loss, action_loss, dist_entropy, ) = self._update_agent(ppo_cfg, rollouts) pth_time += delta_pth_time for k, v in running_episode_stats.items(): window_episode_stats[k].append(v.clone()) deltas = { k: ((v[-1] - v[0]).sum().item() if len(v) > 1 else v[0].sum().item()) for k, v in window_episode_stats.items() } deltas["count"] = max(deltas["count"], 1.0) writer.add_scalar("reward", deltas["reward"] / deltas["count"], count_steps) # Check to see if there are any metrics # that haven't been logged yet metrics = { k: v / deltas["count"] for k, v in deltas.items() if k not in {"reward", "count"} } if len(metrics) > 0: writer.add_scalars("metrics", metrics, count_steps) losses = [value_loss, action_loss] writer.add_scalars( "losses", {k: l for l, k in zip(losses, ["value", "policy"])}, count_steps, ) # log stats if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info("update: {}\tfps: {:.3f}\t".format( update, count_steps / (time.time() - t_start))) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format(update, env_time, pth_time, count_steps)) logger.info("Average window size: {} {}".format( len(window_episode_stats["count"]), " ".join("{}: {:.3f}".format(k, v / deltas["count"]) for k, v in deltas.items() if k != "count"), )) # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint(f"ckpt.{count_checkpoints}.pth", dict(step=count_steps)) count_checkpoints += 1 self.envs.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, get_env_class(config.ENV_NAME)) self._setup_actor_critic_agent(ppo_cfg) self.actor_critic.eval() if self._static_encoder: self._encoder = self.agent.actor_critic.net.visual_encoder self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs(observations, device=self.device) if self._static_encoder: batch["visual_features"] = self._encoder(batch) batch["prev_visual_features"] = torch.zeros_like( batch["visual_features"]) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}.") logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps pbar = tqdm.tqdm(total=number_of_eval_episodes) self.actor_critic.eval() while (len(stats_episodes) < number_of_eval_episodes and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): step_batch = batch ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations, device=self.device) if self._static_encoder: batch["prev_visual_features"] = step_batch["visual_features"] batch["visual_features"] = self._encoder(batch) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: pbar.update() episode_stats = dict() episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i])) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info(infos[i]), tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) num_episodes = len(stats_episodes) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) self.envs.close()
def train(self) -> None: r"""Main method for training PPO. Returns: None """ self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) ppo_cfg = self.config.RL.PPO self.device = torch.device("cuda", self.config.TORCH_GPU_ID) if not os.path.isdir(self.config.CHECKPOINT_FOLDER): os.makedirs(self.config.CHECKPOINT_FOLDER) self._setup_actor_critic_agent(ppo_cfg) logger.info("agent number of parameters: {}".format( sum(param.numel() for param in self.agent.parameters()))) observations = self.envs.reset() batch = batch_obs(observations) rollouts = RolloutStorage( ppo_cfg.num_steps, self.envs.num_envs, self.envs.observation_spaces[0], self.envs.action_spaces[0], ppo_cfg.hidden_size, ) for sensor in rollouts.observations: rollouts.observations[sensor][0].copy_(batch[sensor]) rollouts.to(self.device) episode_rewards = torch.zeros(self.envs.num_envs, 1) episode_counts = torch.zeros(self.envs.num_envs, 1) current_episode_reward = torch.zeros(self.envs.num_envs, 1) window_episode_reward = deque(maxlen=ppo_cfg.reward_window_size) window_episode_counts = deque(maxlen=ppo_cfg.reward_window_size) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) with TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) as writer: for update in range(self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES) for step in range(ppo_cfg.num_steps): delta_pth_time, delta_env_time, delta_steps = self._collect_rollout_step( rollouts, current_episode_reward, episode_rewards, episode_counts, ) pth_time += delta_pth_time env_time += delta_env_time count_steps += delta_steps delta_pth_time, value_loss, action_loss, dist_entropy = self._update_agent( ppo_cfg, rollouts) pth_time += delta_pth_time window_episode_reward.append(episode_rewards.clone()) window_episode_counts.append(episode_counts.clone()) losses = [value_loss, action_loss] stats = zip( ["count", "reward"], [window_episode_counts, window_episode_reward], ) deltas = { k: ((v[-1] - v[0]).sum().item() if len(v) > 1 else v[0].sum().item()) for k, v in stats } deltas["count"] = max(deltas["count"], 1.0) writer.add_scalar("reward", deltas["reward"] / deltas["count"], count_steps) writer.add_scalars( "losses", {k: l for l, k in zip(losses, ["value", "policy"])}, count_steps, ) # log stats if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info("update: {}\tfps: {:.3f}\t".format( update, count_steps / (time.time() - t_start))) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format(update, env_time, pth_time, count_steps)) window_rewards = (window_episode_reward[-1] - window_episode_reward[0]).sum() window_counts = (window_episode_counts[-1] - window_episode_counts[0]).sum() if window_counts > 0: logger.info( "Average window size {} reward: {:3f}".format( len(window_episode_reward), (window_rewards / window_counts).item(), )) else: logger.info("No episodes finish in current window") # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint(f"ckpt.{count_checkpoints}.pth") count_checkpoints += 1 self.envs.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ ckpt_dict = self.load_checkpoint(checkpoint_path, map_location=self.device) config = self._setup_eval_config(ckpt_dict["config"]) ppo_cfg = config.RL.PPO if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic # get name of performance metric, e.g. "spl" metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0] metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name) measure_type = baseline_registry.get_measure(metric_cfg.TYPE) assert measure_type is not None, "invalid measurement type {}".format( metric_cfg.TYPE) self.metric_uuid = measure_type(None, None)._get_uuid() observations = self.envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [ [] ] * self.config.NUM_PROCESSES # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats[self.metric_uuid] = infos[i][ self.metric_uuid] episode_stats["success"] = int( infos[i][self.metric_uuid] > 0) episode_stats["reward"] = current_episode_reward[i].item() current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metric_name=self.metric_uuid, metric_value=infos[i][self.metric_uuid], tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # pausing self.envs with no new episode if len(envs_to_pause) > 0: state_index = list(range(self.envs.num_envs)) for idx in reversed(envs_to_pause): state_index.pop(idx) self.envs.pause_at(idx) # indexing along the batch dimensions test_recurrent_hidden_states = test_recurrent_hidden_states[ state_index] not_done_masks = not_done_masks[state_index] current_episode_reward = current_episode_reward[state_index] prev_actions = prev_actions[state_index] for k, v in batch.items(): batch[k] = v[state_index] if len(self.config.VIDEO_OPTION) > 0: rgb_frames = [rgb_frames[i] for i in state_index] aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) episode_reward_mean = aggregated_stats["reward"] / num_episodes episode_metric_mean = aggregated_stats[self.metric_uuid] / num_episodes episode_success_mean = aggregated_stats["success"] / num_episodes logger.info(f"Average episode reward: {episode_reward_mean:.6f}") logger.info(f"Average episode success: {episode_success_mean:.6f}") logger.info( f"Average episode {self.metric_uuid}: {episode_metric_mean:.6f}") writer.add_scalars( "eval_reward", {"average reward": episode_reward_mean}, checkpoint_index, ) writer.add_scalars( f"eval_{self.metric_uuid}", {f"average {self.metric_uuid}": episode_metric_mean}, checkpoint_index, ) writer.add_scalars( "eval_success", {"average success": episode_success_mean}, checkpoint_index, ) self.envs.close()
def _eval_checkpoint(self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, log_diagnostics=[], output_dir='.', label='.', num_eval_runs=1) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ if checkpoint_index == -1: ckpt_file = checkpoint_path.split('/')[-1] split_info = ckpt_file.split('.') checkpoint_index = split_info[1] # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO task_cfg = config.TASK_CONFIG.TASK config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, get_env_class(config.ENV_NAME)) # pass in aux config if we're doing attention aux_cfg = self.config.RL.AUX_TASKS self._setup_actor_critic_agent(ppo_cfg, task_cfg, aux_cfg) # Check if we accidentally recorded `visual_resnet` in our checkpoint and drop it (it's redundant with `visual_encoder`) ckpt_dict['state_dict'] = { k: v for k, v in ckpt_dict['state_dict'].items() if 'visual_resnet' not in k } self.agent.load_state_dict(ckpt_dict["state_dict"]) logger.info("agent number of trainable parameters: {}".format( sum(param.numel() for param in self.agent.parameters() if param.requires_grad))) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs(observations, device=self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) _, num_recurrent_memories, _ = self._setup_auxiliary_tasks( aux_cfg, ppo_cfg, task_cfg, is_eval=True) if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES: aux_tasks = self.config.RL.AUX_TASKS.tasks num_recurrent_memories = len(self.config.RL.AUX_TASKS.tasks) test_recurrent_hidden_states = test_recurrent_hidden_states.unsqueeze( 2).repeat(1, 1, num_recurrent_memories, 1) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}.") logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps videos_cap = 2 # number of videos to generate per checkpoint if len(log_diagnostics) > 0: videos_cap = 10 # video_indices = random.sample(range(self.config.TEST_EPISODE_COUNT), # min(videos_cap, self.config.TEST_EPISODE_COUNT)) video_indices = range(10) print(f"Videos: {video_indices}") total_stats = [] dones_per_ep = dict() # Logging more extensive evaluation stats for analysis if len(log_diagnostics) > 0: d_stats = {} for d in log_diagnostics: d_stats[d] = [ [] for _ in range(self.config.NUM_PROCESSES) ] # stored as nested list envs x timesteps x k (# tasks) pbar = tqdm.tqdm(total=number_of_eval_episodes * num_eval_runs) self.agent.eval() while (len(stats_episodes) < number_of_eval_episodes * num_eval_runs and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): weights_output = None if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES: weights_output = torch.empty(self.envs.num_envs, len(aux_tasks)) ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act(batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, weights_output=weights_output) prev_actions.copy_(actions) for i in range(self.envs.num_envs): if Diagnostics.actions in log_diagnostics: d_stats[Diagnostics.actions][i].append( prev_actions[i].item()) if Diagnostics.weights in log_diagnostics: aux_weights = None if weights_output is None else weights_output[ i] if aux_weights is not None: d_stats[Diagnostics.weights][i].append( aux_weights.half().tolist()) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations, device=self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): next_k = ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) if dones_per_ep.get(next_k, 0) == num_eval_runs: envs_to_pause.append(i) # wait for the rest if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i])) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats k = ( current_episodes[i].scene_id, current_episodes[i].episode_id, ) dones_per_ep[k] = dones_per_ep.get(k, 0) + 1 if dones_per_ep.get(k, 0) == 1 and len( self.config.VIDEO_OPTION) > 0 and len( stats_episodes) in video_indices: logger.info(f"Generating video {len(stats_episodes)}") category = getattr(current_episodes[i], "object_category", "") if category != "": category += "_" try: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info( infos[i]), tag=f"{category}{label}", tb_writer=writer, ) except Exception as e: logger.warning(str(e)) rgb_frames[i] = [] stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, dones_per_ep[k], )] = episode_stats if len(log_diagnostics) > 0: diagnostic_info = dict() for metric in log_diagnostics: diagnostic_info[metric] = d_stats[metric][i] d_stats[metric][i] = [] if Diagnostics.top_down_map in log_diagnostics: top_down_map = torch.tensor([]) if len(self.config.VIDEO_OPTION) > 0: top_down_map = infos[i]["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, fog_of_war_mask=None) diagnostic_info.update( dict(top_down_map=top_down_map)) total_stats.append( dict( stats=episode_stats, did_stop=bool(prev_actions[i] == 0), episode_info=attr.asdict(current_episodes[i]), info=diagnostic_info, )) pbar.update() # episode continues else: if len(self.config.VIDEO_OPTION) > 0: aux_weights = None if weights_output is None else weights_output[ i] frame = observations_to_image( observations[i], infos[i], current_episode_reward[i].item(), aux_weights, aux_tasks) rgb_frames[i].append(frame) if Diagnostics.gps in log_diagnostics: d_stats[Diagnostics.gps][i].append( observations[i]["gps"].tolist()) if Diagnostics.heading in log_diagnostics: d_stats[Diagnostics.heading][i].append( observations[i]["heading"].tolist()) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) num_episodes = len(stats_episodes) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) logger.info("eval_metrics") logger.info(metrics) if len(log_diagnostics) > 0: os.makedirs(output_dir, exist_ok=True) eval_fn = f"{label}.json" with open(os.path.join(output_dir, eval_fn), 'w', encoding='utf-8') as f: json.dump(total_stats, f, ensure_ascii=False, indent=4) self.envs.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ NUM_PROCESSES = 3 # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.NUM_PROCESSES = NUM_PROCESSES config.NUM_VAL_PROCESSES = 0 config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_EPISODES = 10 config.TEST_EPISODE_COUNT = 500 config.RL.PPO.pretrained = False config.RL.PPO.pretrained_encoder = False if torch.cuda.device_count() <= 1: config.TORCH_GPU_ID = 0 config.SIMULATOR_GPU_ID = 0 else: config.TORCH_GPU_ID = 0 config.SIMULATOR_GPU_ID = 1 config.VIDEO_DIR += '_eval' config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() self.config = config logger.info(f"env config: {config}") self.envs = construct_envs(config, eval(self.config.ENV_NAME), run_type='eval') self._setup_actor_critic_agent(ppo_cfg) print(config.POLICY) # if 'SMT' in config.POLICY: # sd = torch.load('visual_embedding18.pth') # self.actor_critic.net.visual_encoder.load_state_dict(sd['visual_encoder']) # self.actor_critic.net.prev_action_embedding.load_state_dict(sd['prev_action_embedding']) # self.actor_critic.net.visual_encoder.cuda() # self.actor_critic.net.prev_action_embedding.cuda() # self.envs.setup_embedding_network(self.actor_critic.net.visual_encoder, self.actor_critic.net.prev_action_embedding) # print('-----------------------------setup pretrained visual embedding network') try: self.agent.load_state_dict(ckpt_dict["state_dict"]) except: raise initial_state_dict = self.actor_critic.state_dict() initial_state_dict.update({ k[len("actor_critic."):]: v for k, v in ckpt_dict['state_dict'].items() if k[len("actor_critic."):] in initial_state_dict and v.shape == initial_state_dict[k[len("actor_critic."):]].shape }) print({ k[len("actor_critic."):]: v for k, v in ckpt_dict['state_dict'].items() if k[len("actor_critic."):] in initial_state_dict and v.shape == initial_state_dict[k[len("actor_critic."):]].shape }.keys()) self.actor_critic.load_state_dict(initial_state_dict) self.actor_critic = self.agent.actor_critic batch = self.envs.reset() #batch = batch_obs(observations, device=self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros(NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) pbar = tqdm.tqdm(total=self.config.TEST_EPISODE_COUNT) self.actor_critic.eval() while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT and self.envs.num_envs > 0): #print(len(stats_episodes), self.config.TEST_EPISODE_COUNT, self.envs.num_envs) current_episodes = self.envs.current_episodes() with torch.no_grad(): ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) actions = actions.unsqueeze(1) prev_actions.copy_(actions) batch, rewards, dones, infos = self.envs.step( [a[0].item() for a in actions]) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: pbar.update() episode_stats = dict() episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i])) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats # episode continues #elif len(self.config.VIDEO_OPTION) > 0: # frame = self.envs.call_at(i, 'render', {'mode': 'rgb_array'})#observations_to_image(observations[i], infos[i]) # rgb_frames[i].append(frame) num_episodes = len(stats_episodes) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) self.envs.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ config = self.config config.defrost() config.TASK_CONFIG.DATASET.SPLIT = self.config.EVAL.SPLIT config.freeze() eqa_cnn_pretrain_dataset = EQACNNPretrainDataset(config, mode="val") eval_loader = DataLoader( eqa_cnn_pretrain_dataset, batch_size=config.IL.EQACNNPretrain.batch_size, shuffle=False, ) logger.info("[ eval_loader has {} samples ]".format( len(eqa_cnn_pretrain_dataset))) model = MultitaskCNN() state_dict = torch.load(checkpoint_path) model.load_state_dict(state_dict) model.to(self.device).eval() depth_loss = torch.nn.SmoothL1Loss() ae_loss = torch.nn.SmoothL1Loss() seg_loss = torch.nn.CrossEntropyLoss() t = 0 avg_loss = 0.0 avg_l1 = 0.0 avg_l2 = 0.0 avg_l3 = 0.0 with torch.no_grad(): for batch in eval_loader: t += 1 idx, gt_rgb, gt_depth, gt_seg = batch gt_rgb = gt_rgb.to(self.device) gt_depth = gt_depth.to(self.device) gt_seg = gt_seg.to(self.device) pred_seg, pred_depth, pred_rgb = model(gt_rgb) l1 = seg_loss(pred_seg, gt_seg.long()) l2 = ae_loss(pred_rgb, gt_rgb) l3 = depth_loss(pred_depth, gt_depth) loss = l1 + (10 * l2) + (10 * l3) avg_loss += loss.item() avg_l1 += l1.item() avg_l2 += l2.item() avg_l3 += l3.item() if t % config.LOG_INTERVAL == 0: logger.info( "[ Iter: {}; loss: {:.3f} ]".format(t, loss.item()), ) if (config.EVAL_SAVE_RESULTS and t % config.EVAL_SAVE_RESULTS_INTERVAL == 0): result_id = "ckpt_{}_{}".format(checkpoint_index, idx[0].item()) result_path = os.path.join(self.config.RESULTS_DIR, result_id) self._save_results( gt_rgb, pred_rgb, gt_seg, pred_seg, gt_depth, pred_depth, result_path, ) avg_loss /= len(eval_loader) avg_l1 /= len(eval_loader) avg_l2 /= len(eval_loader) avg_l3 /= len(eval_loader) writer.add_scalar("avg val total loss", avg_loss, checkpoint_index) writer.add_scalars( "avg val individual_losses", { "seg_loss": avg_l1, "ae_loss": avg_l2, "depth_loss": avg_l3 }, checkpoint_index, ) logger.info("[ Average loss: {:.3f} ]".format(avg_loss)) logger.info("[ Average seg loss: {:.3f} ]".format(avg_l1)) logger.info("[ Average autoencoder loss: {:.4f} ]".format(avg_l2)) logger.info("[ Average depthloss: {:.4f} ]".format(avg_l3))
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ config = self.config config.defrost() config.TASK_CONFIG.DATASET.SPLIT = self.config.EVAL.SPLIT config.freeze() vqa_dataset = ( EQADataset( config, input_type="vqa", num_frames=config.IL.VQA.num_frames, ) .shuffle(1000) .to_tuple( "episode_id", "question", "answer", *["{0:0=3d}.jpg".format(x) for x in range(0, 5)], ) .map(img_bytes_2_np_array) ) eval_loader = DataLoader( vqa_dataset, batch_size=config.IL.VQA.batch_size ) logger.info("eval_loader has {} samples".format(len(vqa_dataset))) q_vocab_dict, ans_vocab_dict = vqa_dataset.get_vocab_dicts() model_kwargs = { "q_vocab": q_vocab_dict.word2idx_dict, "ans_vocab": ans_vocab_dict.word2idx_dict, "eqa_cnn_pretrain_ckpt_path": config.EQA_CNN_PRETRAIN_CKPT_PATH, } model = VqaLstmCnnAttentionModel(**model_kwargs) state_dict = torch.load( checkpoint_path, map_location={"cuda:0": "cpu"} ) model.load_state_dict(state_dict) lossFn = torch.nn.CrossEntropyLoss() t = 0 avg_loss = 0.0 avg_accuracy = 0.0 avg_mean_rank = 0.0 avg_mean_reciprocal_rank = 0.0 model.eval() model.cnn.eval() model.to(self.device) metrics = VqaMetric( info={"split": "val"}, metric_names=[ "loss", "accuracy", "mean_rank", "mean_reciprocal_rank", ], log_json=os.path.join(config.OUTPUT_LOG_DIR, "eval.json"), ) with torch.no_grad(): for batch in eval_loader: t += 1 episode_ids, questions, answers, frame_queue = batch questions = questions.to(self.device) answers = answers.to(self.device) frame_queue = frame_queue.to(self.device) scores, _ = model(frame_queue, questions) loss = lossFn(scores, answers) accuracy, ranks = metrics.compute_ranks( scores.data.cpu(), answers ) metrics.update([loss.item(), accuracy, ranks, 1.0 / ranks]) ( metrics_loss, accuracy, mean_rank, mean_reciprocal_rank, ) = metrics.get_stats(mode=0) avg_loss += metrics_loss avg_accuracy += accuracy avg_mean_rank += mean_rank avg_mean_reciprocal_rank += mean_reciprocal_rank if t % config.LOG_INTERVAL == 0: logger.info(metrics.get_stat_string(mode=0)) metrics.dump_log() if ( config.EVAL_SAVE_RESULTS and t % config.EVAL_SAVE_RESULTS_INTERVAL == 0 ): self._save_vqa_results( checkpoint_index, episode_ids, questions, frame_queue, scores, answers, q_vocab_dict, ans_vocab_dict, ) num_batches = math.ceil(len(vqa_dataset) / config.IL.VQA.batch_size) avg_loss /= num_batches avg_accuracy /= num_batches avg_mean_rank /= num_batches avg_mean_reciprocal_rank /= num_batches writer.add_scalar("avg val loss", avg_loss, checkpoint_index) writer.add_scalar("avg val accuracy", avg_accuracy, checkpoint_index) writer.add_scalar("avg val mean rank", avg_mean_rank, checkpoint_index) writer.add_scalar( "avg val mean reciprocal rank", avg_mean_reciprocal_rank, checkpoint_index, ) logger.info("Average loss: {:.2f}".format(avg_loss)) logger.info("Average accuracy: {:.2f}".format(avg_accuracy)) logger.info("Average mean rank: {:.2f}".format(avg_mean_rank)) logger.info( "Average mean reciprocal rank: {:.2f}".format( avg_mean_reciprocal_rank ) )
def train(self) -> None: r"""Main method for training DD/PPO. Returns: None """ self._init_train() count_checkpoints = 0 prev_time = 0 lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: 1 - self.percent_done(), ) interrupted_state = load_interrupted_state() if interrupted_state is not None: self.agent.load_state_dict(interrupted_state["state_dict"]) self.agent.optimizer.load_state_dict( interrupted_state["optim_state"] ) lr_scheduler.load_state_dict(interrupted_state["lr_sched_state"]) requeue_stats = interrupted_state["requeue_stats"] self.env_time = requeue_stats["env_time"] self.pth_time = requeue_stats["pth_time"] self.num_steps_done = requeue_stats["num_steps_done"] self.num_updates_done = requeue_stats["num_updates_done"] self._last_checkpoint_percent = requeue_stats[ "_last_checkpoint_percent" ] count_checkpoints = requeue_stats["count_checkpoints"] prev_time = requeue_stats["prev_time"] self._last_checkpoint_percent = requeue_stats[ "_last_checkpoint_percent" ] ppo_cfg = self.config.RL.PPO with ( TensorboardWriter( self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs ) if rank0_only() else contextlib.suppress() ) as writer: while not self.is_done(): profiling_wrapper.on_start_step() profiling_wrapper.range_push("train update") if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * ( 1 - self.percent_done() ) if EXIT.is_set(): profiling_wrapper.range_pop() # train update self.envs.close() if REQUEUE.is_set() and rank0_only(): requeue_stats = dict( env_time=self.env_time, pth_time=self.pth_time, count_checkpoints=count_checkpoints, num_steps_done=self.num_steps_done, num_updates_done=self.num_updates_done, _last_checkpoint_percent=self._last_checkpoint_percent, prev_time=(time.time() - self.t_start) + prev_time, ) save_interrupted_state( dict( state_dict=self.agent.state_dict(), optim_state=self.agent.optimizer.state_dict(), lr_sched_state=lr_scheduler.state_dict(), config=self.config, requeue_stats=requeue_stats, ) ) requeue_job() return self.agent.eval() count_steps_delta = 0 profiling_wrapper.range_push("rollouts loop") profiling_wrapper.range_push("_collect_rollout_step") for buffer_index in range(self._nbuffers): self._compute_actions_and_step_envs(buffer_index) for step in range(ppo_cfg.num_steps): is_last_step = ( self.should_end_early(step + 1) or (step + 1) == ppo_cfg.num_steps ) for buffer_index in range(self._nbuffers): count_steps_delta += self._collect_environment_result( buffer_index ) if (buffer_index + 1) == self._nbuffers: profiling_wrapper.range_pop() # _collect_rollout_step if not is_last_step: if (buffer_index + 1) == self._nbuffers: profiling_wrapper.range_push( "_collect_rollout_step" ) self._compute_actions_and_step_envs(buffer_index) if is_last_step: break profiling_wrapper.range_pop() # rollouts loop if self._is_distributed: self.num_rollouts_done_store.add("num_done", 1) ( value_loss, action_loss, dist_entropy, ) = self._update_agent() if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() # type: ignore self.num_updates_done += 1 losses = self._coalesce_post_step( dict(value_loss=value_loss, action_loss=action_loss), count_steps_delta, ) self._training_log(writer, losses, prev_time) # checkpoint model if rank0_only() and self.should_checkpoint(): self.save_checkpoint( f"ckpt.{count_checkpoints}.pth", dict( step=self.num_steps_done, wall_time=(time.time() - self.t_start) + prev_time, ), ) count_checkpoints += 1 profiling_wrapper.range_pop() # train update self.envs.close()
def main(): print("---------------------") print("Actions") print("STOP", HabitatSimActions.STOP) print("FORWARD", HabitatSimActions.MOVE_FORWARD) print("LEFT", HabitatSimActions.TURN_LEFT) print("RIGHT", HabitatSimActions.TURN_RIGHT) log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name) dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name) tb_dir = log_dir + "tensorboard" if not os.path.exists(tb_dir): os.makedirs(tb_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists("{}/images/".format(dump_dir)): os.makedirs("{}/images/".format(dump_dir)) logging.basicConfig( filename=log_dir + 'train.log', level=logging.INFO) print("Dumping at {}".format(log_dir)) print("Arguments starting with ", args) logging.info(args) device = args.device = torch.device("cuda:0" if args.cuda else "cpu") # Logging and loss variables num_scenes = args.num_processes num_episodes = int(args.num_episodes) # setting up rewards and losses # policy_loss = 0 best_cost = float('inf') costs = deque(maxlen=1000) exp_costs = deque(maxlen=1000) pose_costs = deque(maxlen=1000) l_masks = torch.zeros(num_scenes).float().to(device) # best_local_loss = np.inf # if args.eval: # traj_lengths = args.max_episode_length // args.num_local_steps # l_action_losses = deque(maxlen=1000) print("Setup rewards") print("starting envrionments ...") # Starting environments torch.set_num_threads(1) envs = make_vec_envs(args) obs, infos = envs.reset() print("environments reset") # show_gpu_usage() # Initialize map variables ### Full map consists of 4 channels containing the following: ### 1. Obstacle Map ### 2. Exploread Area ### 3. Current Agent Location ### 4. Past Agent Locations print("creating maps and poses ") torch.set_grad_enabled(False) # Calculating full and local map sizes map_size = args.map_size_cm // args.map_resolution full_w, full_h = map_size, map_size local_w, local_h = int(full_w / args.global_downscaling), \ int(full_h / args.global_downscaling) # Initializing full and local map full_map = torch.zeros(num_scenes, 4, full_w, full_h).float().to(device) local_map = torch.zeros(num_scenes, 4, local_w, local_h).float().to(device) # Initial full and local pose full_pose = torch.zeros(num_scenes, 3).float().to(device) local_pose = torch.zeros(num_scenes, 3).float().to(device) # Origin of local map origins = np.zeros((num_scenes, 3)) # Local Map Boundaries lmb = np.zeros((num_scenes, 4)).astype(int) ### Planner pose inputs has 7 dimensions ### 1-3 store continuous global agent location ### 4-7 store local map boundaries planner_pose_inputs = np.zeros((num_scenes, 7)) # show_gpu_usage() start_full_pose = np.zeros(3) start_full_pose[:2] = args.map_size_cm / 100.0 / 2.0 def init_map_and_pose(): full_map.fill_(0.) full_pose.fill_(0.) full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0 full_pose_np = full_pose.cpu().numpy() planner_pose_inputs[:, :3] = full_pose_np for e in range(num_scenes): r, c = full_pose_np[e, 1], full_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] full_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0 lmb[e] = get_local_map_boundaries((loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0.] for e in range(num_scenes): local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() init_map_and_pose() print("maps and poses intialized") print("defining architecture") # slam nslam_module = Neural_SLAM_Module(args).to(device) slam_optimizer = get_optimizer(nslam_module.parameters(), args.slam_optimizer) slam_memory = FIFOMemory(args.slam_memory_size) # # Local policy # print("policy observation space", envs.observation_space.spaces['rgb']) # print("policy action space ", envs.action_space) # l_observation_space = gym.spaces.Box(0, 255, # (3, # args.frame_width, # args.frame_width), dtype='uint8') # # todo change this to use envs.observation_space.spaces['rgb'].shape later # l_policy = Local_IL_Policy(l_observation_space.shape, envs.action_space.n, # recurrent=args.use_recurrent_local, # hidden_size=args.local_hidden_size, # deterministic=args.use_deterministic_local).to(device) # local_optimizer = get_optimizer(l_policy.parameters(), args.local_optimizer) # show_gpu_usage() print("loading model weights") # Loading model if args.load_slam != "0": print("Loading slam {}".format(args.load_slam)) state_dict = torch.load(args.load_slam, map_location=lambda storage, loc: storage) nslam_module.load_state_dict(state_dict) if not args.train_slam: nslam_module.eval() # if args.load_local != "0": # print("Loading local {}".format(args.load_local)) # state_dict = torch.load(args.load_local, # map_location=lambda storage, loc: storage) # l_policy.load_state_dict(state_dict) # if not args.train_local: # l_policy.eval() print("predicting first pose and initializing maps") # if not (args.use_gt_pose and args.use_gt_map): # delta_pose is the expected change in pose when action is applied at # the current pose in the absence of noise. # initially no action is applied so this is zero. delta_poses = torch.from_numpy(np.zeros(local_pose.shape)).float().to(device) # initial estimate for local pose and local map from first observation, # initialized (zero) pose and map _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(obs, obs, delta_poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose) # if args.use_gt_pose: # # todo update local_pose here # full_pose = envs.get_gt_pose() # for e in range(num_scenes): # local_pose[e] = full_pose[e] - \ # torch.from_numpy(origins[e]).to(device).float() # if args.use_gt_map: # full_map = envs.get_gt_map() # for e in range(num_scenes): # local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] print("slam module returned pose and maps") # NOT NEEDED : 4/29 local_pose_np = local_pose.cpu().numpy() # update local map for each scene - input for planner for e in range(num_scenes): r, c = local_pose_np[e, 1], local_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] local_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1. # # todo get goal from env here global_goals = envs.get_goal_coords().int() # Compute planner inputs planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['goal'] = global_goals[e].detach().cpu().numpy() p_input['map_pred'] = local_map[e, 0, :, :].detach().cpu().numpy() p_input['exp_pred'] = local_map[e, 1, :, :].detach().cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] # Output stores local goals as well as the the ground-truth action planner_out = envs.get_short_term_goal(planner_inputs) # planner output contains: # Distance to short term goal - positive discretized number # angle to short term goal - angle -180 to 180 but in buckets of 5 degrees so multiply by 5 to ge true angle # GT action - action to be taken according to planner (int) # going to step through the episodes, so cache previous information last_obs = obs.detach() local_rec_states = torch.zeros(num_scenes, args.local_hidden_size).to(device) start = time.time() total_num_steps = -1 torch.set_grad_enabled(False) print("starting episodes") with TensorboardWriter( tb_dir, flush_secs=60 ) as writer: for itr_counter, ep_num in enumerate(range(num_episodes)): print("------------------------------------------------------") print("Episode", ep_num) # if itr_counter >= 20: # print("DONE WE FIXED IT") # die() # for step in range(args.max_episode_length): step_bar = tqdm(range(args.max_episode_length)) for step in step_bar: # print("------------------------------------------------------") # print("episode ", ep_num, "step ", step) total_num_steps += 1 l_step = step % args.num_local_steps # Local Policy # ------------------------------------------------------------------ # cache previous information del last_obs last_obs = obs.detach() # if not args.use_optimal_policy and not args.use_shortest_path_gt: # local_masks = l_masks # local_goals = planner_out[:, :-1].to(device).long() # if args.train_local: # torch.set_grad_enabled(True) # # local policy "step" # action, action_prob, local_rec_states = l_policy( # obs, # local_rec_states, # local_masks, # extras=local_goals, # ) # if args.train_local: # action_target = planner_out[:, -1].long().to(device) # # doubt: this is probably wrong? one is action probability and the other is action # policy_loss += nn.CrossEntropyLoss()(action_prob, action_target) # torch.set_grad_enabled(False) # l_action = action.cpu() # else: # if args.use_optimal_policy: # l_action = planner_out[:, -1] # else: # l_action = envs.get_optimal_gt_action() l_action = envs.get_optimal_action(start_full_pose, full_pose).cpu() # if step > 10: # l_action = torch.tensor([HabitatSimActions.STOP]) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Env step # print("stepping with action ", l_action) # try: obs, rew, done, infos = envs.step(l_action) # ------------------------------------------------------------------ # Reinitialize variables when episode ends # doubt what if episode ends before max_episode_length? # maybe add (or done ) here? if l_action == HabitatSimActions.STOP or step == args.max_episode_length - 1: print("l_action", l_action) init_map_and_pose() del last_obs last_obs = obs.detach() print("Reinitialize since at end of episode ") obs, infos = envs.reset() # except: # print("can't do that") # print(l_action) # init_map_and_pose() # del last_obs # last_obs = obs.detach() # print("Reinitialize since at end of episode ") # break # step_bar.set_description("rew, done, info-sensor_pose, pose_err (stepping) {}, {}, {}, {}".format(rew, done, infos[0]['sensor_pose'], infos[0]['pose_err'])) if total_num_steps % args.log_interval == 0 and False: print("rew, done, info-sensor_pose, pose_err after stepping ", rew, done, infos[0]['sensor_pose'], infos[0]['pose_err']) # l_masks = torch.FloatTensor([0 if x else 1 # for x in done]).to(device) # ------------------------------------------------------------------ # # ------------------------------------------------------------------ # # Reinitialize variables when episode ends # # doubt what if episode ends before max_episode_length? # # maybe add (or done ) here? # if step == args.max_episode_length - 1 or l_action == HabitatSimActions.STOP: # Last episode step # init_map_and_pose() # del last_obs # last_obs = obs.detach() # print("Reinitialize since at end of episode ") # break # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Neural SLAM Module delta_poses_np = np.zeros(local_pose_np.shape) if args.train_slam: # Add frames to memory for env_idx in range(num_scenes): env_obs = obs[env_idx].to("cpu") env_poses = torch.from_numpy(np.asarray( delta_poses_np[env_idx] )).float().to("cpu") env_gt_fp_projs = torch.from_numpy(np.asarray( infos[env_idx]['fp_proj'] )).unsqueeze(0).float().to("cpu") env_gt_fp_explored = torch.from_numpy(np.asarray( infos[env_idx]['fp_explored'] )).unsqueeze(0).float().to("cpu") # TODO change pose err here env_gt_pose_err = torch.from_numpy(np.asarray( infos[env_idx]['pose_err'] )).float().to("cpu") slam_memory.push( (last_obs[env_idx].cpu(), env_obs, env_poses), (env_gt_fp_projs, env_gt_fp_explored, env_gt_pose_err)) delta_poses_np[env_idx] = get_delta_pose(local_pose_np[env_idx], l_action[env_idx]) delta_poses = torch.from_numpy(delta_poses_np).float().to(device) # print("delta pose from SLAM ", delta_poses) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(last_obs, obs, delta_poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose, build_maps=True) # print("updated local pose from SLAM ", local_pose) # if args.use_gt_pose: # # todo update local_pose here # full_pose = envs.get_gt_pose() # for e in range(num_scenes): # local_pose[e] = full_pose[e] - \ # torch.from_numpy(origins[e]).to(device).float() # print("updated local pose from gt ", local_pose) # if args.use_gt_map: # full_map = envs.get_gt_map() # for e in range(num_scenes): # local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] # print("updated local map from gt") local_pose_np = local_pose.cpu().numpy() planner_pose_inputs[:, :3] = local_pose_np + origins local_map[:, 2, :, :].fill_(0.) # Resetting current location channel for e in range(num_scenes): r, c = local_pose_np[e, 1], local_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] local_map[e, 2:, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. if l_step == args.num_local_steps - 1: # For every global step, update the full and local maps for e in range(num_scenes): full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \ local_map[e] full_pose[e] = local_pose[e] + \ torch.from_numpy(origins[e]).to(device).float() full_pose_np = full_pose[e].cpu().numpy() r, c = full_pose_np[1], full_pose_np[0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] lmb[e] = get_local_map_boundaries((loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0.] local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() local_pose_np = local_pose.cpu().numpy() planner_pose_inputs[:, :3] = local_pose_np + origins local_map[:, 2, :, :].fill_(0.) # Resetting current location channel for e in range(num_scenes): r, c = local_pose_np[e, 1], local_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] local_map[e, 2:, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy() p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] p_input['goal'] = global_goals[e].cpu().numpy() planner_out = envs.get_short_term_goal(planner_inputs) ### TRAINING torch.set_grad_enabled(True) # ------------------------------------------------------------------ # Train Neural SLAM Module if args.train_slam and len(slam_memory) > args.slam_batch_size: for _ in range(args.slam_iterations): inputs, outputs = slam_memory.sample(args.slam_batch_size) b_obs_last, b_obs, b_poses = inputs gt_fp_projs, gt_fp_explored, gt_pose_err = outputs b_obs = b_obs.to(device) b_obs_last = b_obs_last.to(device) b_poses = b_poses.to(device) gt_fp_projs = gt_fp_projs.to(device) gt_fp_explored = gt_fp_explored.to(device) gt_pose_err = gt_pose_err.to(device) b_proj_pred, b_fp_exp_pred, _, _, b_pose_err_pred, _ = \ nslam_module(b_obs_last, b_obs, b_poses, None, None, None, build_maps=False) loss = 0 if args.proj_loss_coeff > 0: proj_loss = F.binary_cross_entropy(b_proj_pred, gt_fp_projs) costs.append(proj_loss.item()) loss += args.proj_loss_coeff * proj_loss if args.exp_loss_coeff > 0: exp_loss = F.binary_cross_entropy(b_fp_exp_pred, gt_fp_explored) exp_costs.append(exp_loss.item()) loss += args.exp_loss_coeff * exp_loss if args.pose_loss_coeff > 0: pose_loss = torch.nn.MSELoss()(b_pose_err_pred, gt_pose_err) pose_costs.append(args.pose_loss_coeff * pose_loss.item()) loss += args.pose_loss_coeff * pose_loss if args.train_slam: slam_optimizer.zero_grad() loss.backward() slam_optimizer.step() del b_obs_last, b_obs, b_poses del gt_fp_projs, gt_fp_explored, gt_pose_err del b_proj_pred, b_fp_exp_pred, b_pose_err_pred # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Local Policy # if (l_step + 1) % args.local_policy_update_freq == 0 \ # and args.train_local: # local_optimizer.zero_grad() # policy_loss.backward() # local_optimizer.step() # l_action_losses.append(policy_loss.item()) # policy_loss = 0 # local_rec_states = local_rec_states.detach_() # ------------------------------------------------------------------ # Finish Training torch.set_grad_enabled(False) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Logging writer.add_scalar("SLAM_Loss_Proj", np.mean(costs), total_num_steps) writer.add_scalar("SLAM_Loss_Exp", np.mean(exp_costs), total_num_steps) writer.add_scalar("SLAM_Loss_Pose", np.mean(pose_costs), total_num_steps) gettime = lambda: str(datetime.now()).split('.')[0] if total_num_steps % args.log_interval == 0: end = time.time() time_elapsed = time.gmtime(end - start) log = " ".join([ "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1), "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)), gettime(), "num timesteps {},".format(total_num_steps * num_scenes), "FPS {},".format(int(total_num_steps * num_scenes \ / (end - start))) ]) log += "\n\tLosses:" # if args.train_local and len(l_action_losses) > 0: # log += " ".join([ # " Local Loss:", # "{:.3f},".format( # np.mean(l_action_losses)) # ]) if args.train_slam and len(costs) > 0: log += " ".join([ " SLAM Loss proj/exp/pose:" "{:.4f}/{:.4f}/{:.4f}".format( np.mean(costs), np.mean(exp_costs), np.mean(pose_costs)) ]) print(log) logging.info(log) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Save best models if (total_num_steps * num_scenes) % args.save_interval < \ num_scenes: # Save Neural SLAM Model if len(costs) >= 1000 and np.mean(costs) < best_cost \ and not args.eval: print("Saved best model") best_cost = np.mean(costs) torch.save(nslam_module.state_dict(), os.path.join(log_dir, "model_best.slam")) # Save Local Policy Model # if len(l_action_losses) >= 100 and \ # (np.mean(l_action_losses) <= best_local_loss) \ # and not args.eval: # torch.save(l_policy.state_dict(), # os.path.join(log_dir, "model_best.local")) # # best_local_loss = np.mean(l_action_losses) # Save periodic models if (total_num_steps * num_scenes) % args.save_periodic < \ num_scenes: step = total_num_steps * num_scenes if args.train_slam: torch.save(nslam_module.state_dict(), os.path.join(dump_dir, "periodic_{}.slam".format(step))) # if args.train_local: # torch.save(l_policy.state_dict(), # os.path.join(dump_dir, # "periodic_{}.local".format(step))) # ------------------------------------------------------------------ if l_action == HabitatSimActions.STOP: # Last episode step break # Print and save model performance numbers during evaluation if args.eval: logfile = open("{}/explored_area.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_area_log[e].shape[0]): logfile.write(str(explored_area_log[e, i]) + "\n") logfile.flush() logfile.close() logfile = open("{}/explored_ratio.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_ratio_log[e].shape[0]): logfile.write(str(explored_ratio_log[e, i]) + "\n") logfile.flush() logfile.close() log = "Final Exp Area: \n" for i in range(explored_area_log.shape[2]): log += "{:.5f}, ".format( np.mean(explored_area_log[:, :, i])) log += "\nFinal Exp Ratio: \n" for i in range(explored_ratio_log.shape[2]): log += "{:.5f}, ".format( np.mean(explored_ratio_log[:, :, i])) print(log) logging.info(log)
def train(self) -> None: r"""Main method for DD-PPO SLAM. Returns: None """ ##################################################################### ## init distrib and configuration ##################################################################### self.local_rank, tcp_store = init_distrib_slurm( self.config.RL.DDPPO.distrib_backend ) # self.local_rank = 1 add_signal_handlers() # Stores the number of workers that have finished their rollout num_rollouts_done_store = distrib.PrefixStore( "rollout_tracker", tcp_store ) num_rollouts_done_store.set("num_done", "0") self.world_rank = distrib.get_rank() # server number self.world_size = distrib.get_world_size() self.config.defrost() self.config.TORCH_GPU_ID = self.local_rank # gpu number in one server self.config.SIMULATOR_GPU_ID = self.local_rank print("********************* TORCH_GPU_ID: ", self.config.TORCH_GPU_ID) print("********************* SIMULATOR_GPU_ID: ", self.config.SIMULATOR_GPU_ID) # Multiply by the number of simulators to make sure they also get unique seeds self.config.TASK_CONFIG.SEED += ( self.world_rank * self.config.NUM_PROCESSES ) self.config.freeze() random.seed(self.config.TASK_CONFIG.SEED) np.random.seed(self.config.TASK_CONFIG.SEED) torch.manual_seed(self.config.TASK_CONFIG.SEED) if torch.cuda.is_available(): self.device = torch.device("cuda", self.local_rank) torch.cuda.set_device(self.device) else: self.device = torch.device("cpu") ##################################################################### ## build distrib NavSLAMRLEnv environment ##################################################################### print("#############################################################") print("## build distrib NavSLAMRLEnv environment") print("#############################################################") self.envs = construct_envs( self.config, get_env_class(self.config.ENV_NAME) ) observations = self.envs.reset() print("*************************** observations len:", len(observations)) # semantic process for i in range(len(observations)): observations[i]["semantic"] = observations[i]["semantic"].astype(np.int32) se = list(set(observations[i]["semantic"].ravel())) print(se) # print("*************************** observations type:", observations) # print("*************************** observations type:", observations[0]["map_sum"].shape) # 480*480*23 # print("*************************** observations curr_pose:", observations[0]["curr_pose"]) # [] batch = batch_obs(observations, device=self.device) print("*************************** batch len:", len(batch)) # print("*************************** batch:", batch) # print("************************************* current_episodes:", (self.envs.current_episodes())) ##################################################################### ## init actor_critic agent ##################################################################### print("#############################################################") print("## init actor_critic agent") print("#############################################################") self.map_w = observations[0]["map_sum"].shape[0] self.map_h = observations[0]["map_sum"].shape[1] # print("map_: ", observations[0]["curr_pose"].shape) ppo_cfg = self.config.RL.PPO if ( not os.path.isdir(self.config.CHECKPOINT_FOLDER) and self.world_rank == 0 ): os.makedirs(self.config.CHECKPOINT_FOLDER) self._setup_actor_critic_agent(observations, ppo_cfg) self.agent.init_distributed(find_unused_params=True) if self.world_rank == 0: logger.info( "agent number of trainable parameters: {}".format( sum( param.numel() for param in self.agent.parameters() if param.requires_grad ) ) ) ##################################################################### ## init Global Rollout Storage ##################################################################### print("#############################################################") print("## init Global Rollout Storage") print("#############################################################") self.num_each_global_step = self.config.RL.SLAMDDPPO.num_each_global_step rollouts = GlobalRolloutStorage( ppo_cfg.num_steps, self.envs.num_envs, self.obs_space, self.g_action_space, ) rollouts.to(self.device) print('rollouts type:', type(rollouts)) print('--------------------------') # for k in rollouts.keys(): # print("rollouts: {0}".format(rollouts.observations)) for sensor in rollouts.observations: rollouts.observations[sensor][0].copy_(batch[sensor]) with torch.no_grad(): step_observation = { k: v[rollouts.step] for k, v in rollouts.observations.items() } _, actions, _, = self.actor_critic.act( step_observation, rollouts.prev_g_actions[0], rollouts.masks[0], ) self.global_goals = [[int(action[0].item() * self.map_w), int(action[1].item() * self.map_h)] for action in actions] # batch and observations may contain shared PyTorch CUDA # tensors. We must explicitly clear them here otherwise # they will be kept in memory for the entire duration of training! batch = None observations = None current_episode_reward = torch.zeros( self.envs.num_envs, 1, device=self.device ) running_episode_stats = dict( count=torch.zeros(self.envs.num_envs, 1, device=self.device), reward=torch.zeros(self.envs.num_envs, 1, device=self.device), ) window_episode_stats = defaultdict( lambda: deque(maxlen=ppo_cfg.reward_window_size) ) print("*************************** current_episode_reward:", current_episode_reward) print("*************************** running_episode_stats:", running_episode_stats) # print("*************************** window_episode_stats:", window_episode_stats) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 start_update = 0 prev_time = 0 lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) # interrupted_state = load_interrupted_state("/home/cirlab1/userdir/ybg/projects/habitat-api/data/interrup.pth") interrupted_state = load_interrupted_state() if interrupted_state is not None: self.agent.load_state_dict(interrupted_state["state_dict"]) self.agent.optimizer.load_state_dict( interrupted_state["optim_state"] ) lr_scheduler.load_state_dict(interrupted_state["lr_sched_state"]) requeue_stats = interrupted_state["requeue_stats"] env_time = requeue_stats["env_time"] pth_time = requeue_stats["pth_time"] count_steps = requeue_stats["count_steps"] count_checkpoints = requeue_stats["count_checkpoints"] start_update = requeue_stats["start_update"] prev_time = requeue_stats["prev_time"] deif = {} with ( TensorboardWriter( self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs ) if self.world_rank == 0 else contextlib.suppress() ) as writer: for update in range(start_update, self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES ) # print("************************************* current_episodes:", type(self.envs.count_episodes())) # print(EXIT.is_set()) if EXIT.is_set(): self.envs.close() if REQUEUE.is_set() and self.world_rank == 0: requeue_stats = dict( env_time=env_time, pth_time=pth_time, count_steps=count_steps, count_checkpoints=count_checkpoints, start_update=update, prev_time=(time.time() - t_start) + prev_time, ) save_interrupted_state( dict( state_dict=self.agent.state_dict(), optim_state=self.agent.optimizer.state_dict(), lr_sched_state=lr_scheduler.state_dict(), config=self.config, requeue_stats=requeue_stats, ), "/home/cirlab1/userdir/ybg/projects/habitat-api/data/interrup.pth" ) print("********************EXIT*********************") requeue_job() return count_steps_delta = 0 self.agent.eval() for step in range(ppo_cfg.num_steps): ( delta_pth_time, delta_env_time, delta_steps, ) = self._collect_global_rollout_step( rollouts, current_episode_reward, running_episode_stats ) pth_time += delta_pth_time env_time += delta_env_time count_steps_delta += delta_steps # print("************************************* current_episodes:") for i in range(len(self.envs.current_episodes())): # print(" ", self.envs.current_episodes()[i].episode_id," ", self.envs.current_episodes()[i].scene_id," ", self.envs.current_episodes()[i].object_category) if self.envs.current_episodes()[i].scene_id not in deif: deif[self.envs.current_episodes()[i].scene_id]=[int(self.envs.current_episodes()[i].episode_id)] else: deif[self.envs.current_episodes()[i].scene_id].append(int(self.envs.current_episodes()[i].episode_id)) # This is where the preemption of workers happens. If a # worker detects it will be a straggler, it preempts itself! if ( step >= ppo_cfg.num_steps * self.SHORT_ROLLOUT_THRESHOLD ) and int(num_rollouts_done_store.get("num_done")) > ( self.config.RL.DDPPO.sync_frac * self.world_size ): break num_rollouts_done_store.add("num_done", 1) self.agent.train() if self._static_encoder: self._encoder.eval() ( delta_pth_time, value_loss, action_loss, dist_entropy, ) = self._update_agent(ppo_cfg, rollouts) pth_time += delta_pth_time stats_ordering = list(sorted(running_episode_stats.keys())) stats = torch.stack( [running_episode_stats[k] for k in stats_ordering], 0 ) distrib.all_reduce(stats) for i, k in enumerate(stats_ordering): window_episode_stats[k].append(stats[i].clone()) stats = torch.tensor( [value_loss, action_loss, count_steps_delta], device=self.device, ) distrib.all_reduce(stats) count_steps += stats[2].item() if self.world_rank == 0: num_rollouts_done_store.set("num_done", "0") losses = [ stats[0].item() / self.world_size, stats[1].item() / self.world_size, ] deltas = { k: ( (v[-1] - v[0]).sum().item() if len(v) > 1 else v[0].sum().item() ) for k, v in window_episode_stats.items() } deltas["count"] = max(deltas["count"], 1.0) writer.add_scalar( "reward", deltas["reward"] / deltas["count"], count_steps, ) # Check to see if there are any metrics # that haven't been logged yet metrics = { k: v / deltas["count"] for k, v in deltas.items() if k not in {"reward", "count"} } if len(metrics) > 0: writer.add_scalars("metrics", metrics, count_steps) writer.add_scalars( "losses", {k: l for l, k in zip(losses, ["value", "policy"])}, count_steps, ) # log stats if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info( "update: {}\tfps: {:.3f}\t".format( update, count_steps / ((time.time() - t_start) + prev_time), ) ) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format( update, env_time, pth_time, count_steps ) ) logger.info( "Average window size: {} {}".format( len(window_episode_stats["count"]), " ".join( "{}: {:.3f}".format(k, v / deltas["count"]) for k, v in deltas.items() if k != "count" ), ) ) # for k in deif: # deif[k] = list(set(deif[k])) # deif[k].sort() # print("deif: k", k, " : ", deif[k]) # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint( f"ckpt.{count_checkpoints}.pth", dict(step=count_steps), ) print('=' * 20 + 'Save Model' + '=' * 20) logger.info( "Save Model : {}".format(count_checkpoints) ) count_checkpoints += 1 self.envs.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO ans_cfg = config.RL.ANS config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() self.envs = construct_envs(config, get_env_class(config.ENV_NAME)) self._setup_actor_critic_agent(ppo_cfg, ans_cfg) # Convert the state_dict of mapper_agent to mapper mapper_dict = { k.replace("mapper.", ""): v for k, v in ckpt_dict["mapper_state_dict"].items() } # Converting the state_dict of local_agent to just the local_policy. local_dict = { k.replace("actor_critic.", ""): v for k, v in ckpt_dict["local_state_dict"].items() } # Strict = False is set to ignore to handle the case where # pose_estimator is not required. self.mapper.load_state_dict(mapper_dict, strict=False) self.local_actor_critic.load_state_dict(local_dict) # Set models to evaluation self.mapper.eval() self.local_actor_critic.eval() number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}." ) logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps M = ans_cfg.overall_map_size V = ans_cfg.MAPPER.map_size s = ans_cfg.MAPPER.map_scale imH, imW = ans_cfg.image_scale_hw assert ( self.envs.num_envs == 1 ), "Number of environments needs to be 1 for evaluation" # Define metric accumulators # Navigation metrics navigation_metrics = { "success_rate": Metric(), "spl": Metric(), "distance_to_goal": Metric(), "time": Metric(), "softspl": Metric(), } per_difficulty_navigation_metrics = { "easy": { "success_rate": Metric(), "spl": Metric(), "distance_to_goal": Metric(), "time": Metric(), "softspl": Metric(), }, "medium": { "success_rate": Metric(), "spl": Metric(), "distance_to_goal": Metric(), "time": Metric(), "softspl": Metric(), }, "hard": { "success_rate": Metric(), "spl": Metric(), "distance_to_goal": Metric(), "time": Metric(), "softspl": Metric(), }, } times_per_episode = deque() times_per_step = deque() # Define a simple function to return episode difficulty based on # the geodesic distance def classify_difficulty(gd): if gd < 5.0: return "easy" elif gd < 10.0: return "medium" else: return "hard" eval_start_time = time.time() # Reset environments only for the very first batch observations = self.envs.reset() for ep in range(number_of_eval_episodes): # ============================== Reset agent ============================== # Reset agent states state_estimates = { "pose_estimates": torch.zeros(self.envs.num_envs, 3).to(self.device), "map_states": torch.zeros(self.envs.num_envs, 2, M, M).to(self.device), "recurrent_hidden_states": torch.zeros( 1, self.envs.num_envs, ans_cfg.LOCAL_POLICY.hidden_size ).to(self.device), } # Reset ANS states self.ans_net.reset() self.not_done_masks = torch.zeros(self.envs.num_envs, 1, device=self.device) self.prev_actions = torch.zeros(self.envs.num_envs, 1, device=self.device) self.prev_batch = None self.ep_time = torch.zeros(self.envs.num_envs, 1, device=self.device) # =========================== Episode loop ================================ ep_start_time = time.time() current_episodes = self.envs.current_episodes() for ep_step in range(self.config.T_MAX): step_start_time = time.time() # ============================ Action step ============================ batch = self._prepare_batch(observations) if self.prev_batch is None: self.prev_batch = copy.deepcopy(batch) prev_pose_estimates = state_estimates["pose_estimates"] with torch.no_grad(): ( _, _, mapper_outputs, local_policy_outputs, state_estimates, ) = self.ans_net.act( batch, self.prev_batch, state_estimates, self.ep_time, self.not_done_masks, deterministic=ans_cfg.LOCAL_POLICY.deterministic_flag, ) actions = local_policy_outputs["actions"] # Make masks not done till reset (end of episode) self.not_done_masks = torch.ones( self.envs.num_envs, 1, device=self.device ) self.prev_actions.copy_(actions) if ep_step == 0: state_estimates["pose_estimates"].copy_(prev_pose_estimates) self.ep_time += 1 # Update prev batch for k, v in batch.items(): self.prev_batch[k].copy_(v) # Remap actions from exploration to navigation agent. actions_rmp = self._remap_actions(actions) # =========================== Environment step ======================== outputs = self.envs.step([a[0].item() for a in actions_rmp]) observations, _, dones, infos = [list(x) for x in zip(*outputs)] times_per_step.append(time.time() - step_start_time) # ============================ Process metrics ======================== if dones[0]: times_per_episode.append(time.time() - ep_start_time) mins_per_episode = np.mean(times_per_episode).item() / 60.0 eta_completion = mins_per_episode * ( number_of_eval_episodes - ep - 1 ) secs_per_step = np.mean(times_per_step).item() for i in range(self.envs.num_envs): episode_id = int(current_episodes[i].episode_id) curr_metrics = { "spl": infos[i]["spl"], "softspl": infos[i]["softspl"], "success_rate": infos[i]["success"], "time": ep_step + 1, "distance_to_goal": infos[i]["distance_to_goal"], } # Estimate difficulty of episode episode_difficulty = classify_difficulty( current_episodes[i].info["geodesic_distance"] ) for k, v in curr_metrics.items(): navigation_metrics[k].update(v, 1.0) per_difficulty_navigation_metrics[episode_difficulty][ k ].update(v, 1.0) logger.info(f"====> {ep}/{number_of_eval_episodes} done") for k, v in curr_metrics.items(): logger.info(f"{k:25s} : {v:10.3f}") logger.info("{:25s} : {:10d}".format("episode_id", episode_id)) logger.info(f"Time per episode: {mins_per_episode:.3f} mins") logger.info(f"Time per step: {secs_per_step:.3f} secs") logger.info(f"ETA: {eta_completion:.3f} mins") # For navigation, terminate episode loop when dones is called break # done-for if checkpoint_index == 0: try: eval_ckpt_idx = self.config.EVAL_CKPT_PATH_DIR.split("/")[-1].split( "." )[1] logger.add_filehandler( f"{self.config.TENSORBOARD_DIR}/navigation_results_ckpt_final_{eval_ckpt_idx}.txt" ) except: logger.add_filehandler( f"{self.config.TENSORBOARD_DIR}/navigation_results_ckpt_{checkpoint_index}.txt" ) else: logger.add_filehandler( f"{self.config.TENSORBOARD_DIR}/navigation_results_ckpt_{checkpoint_index}.txt" ) logger.info( f"======= Evaluating over {number_of_eval_episodes} episodes =============" ) logger.info(f"=======> Navigation metrics") for k, v in navigation_metrics.items(): logger.info(f"{k}: {v.get_metric():.3f}") writer.add_scalar(f"navigation/{k}", v.get_metric(), checkpoint_index) for diff, diff_metrics in per_difficulty_navigation_metrics.items(): logger.info(f"=============== {diff:^10s} metrics ==============") for k, v in diff_metrics.items(): logger.info(f"{k}: {v.get_metric():.3f}") writer.add_scalar( f"{diff}_navigation/{k}", v.get_metric(), checkpoint_index ) total_eval_time = (time.time() - eval_start_time) / 60.0 logger.info(f"Total evaluation time: {total_eval_time:.3f} mins") self.envs.close()
def train(self) -> None: r"""Main method for training VQA (Answering) model of EQA. Returns: None """ config = self.config # env = habitat.Env(config=config.TASK_CONFIG) vqa_dataset = ( EQADataset( config, input_type="vqa", num_frames=config.IL.VQA.num_frames, ) .shuffle(1000) .to_tuple( "episode_id", "question", "answer", *["{0:0=3d}.jpg".format(x) for x in range(0, 5)], ) .map(img_bytes_2_np_array) ) train_loader = DataLoader( vqa_dataset, batch_size=config.IL.VQA.batch_size ) logger.info("train_loader has {} samples".format(len(vqa_dataset))) q_vocab_dict, ans_vocab_dict = vqa_dataset.get_vocab_dicts() model_kwargs = { "q_vocab": q_vocab_dict.word2idx_dict, "ans_vocab": ans_vocab_dict.word2idx_dict, "eqa_cnn_pretrain_ckpt_path": config.EQA_CNN_PRETRAIN_CKPT_PATH, "freeze_encoder": config.IL.VQA.freeze_encoder, } model = VqaLstmCnnAttentionModel(**model_kwargs) lossFn = torch.nn.CrossEntropyLoss() optim = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), lr=float(config.IL.VQA.lr), ) metrics = VqaMetric( info={"split": "train"}, metric_names=[ "loss", "accuracy", "mean_rank", "mean_reciprocal_rank", ], log_json=os.path.join(config.OUTPUT_LOG_DIR, "train.json"), ) t, epoch = 0, 1 avg_loss = 0.0 avg_accuracy = 0.0 avg_mean_rank = 0.0 avg_mean_reciprocal_rank = 0.0 logger.info(model) model.train().to(self.device) if config.IL.VQA.freeze_encoder: model.cnn.eval() with TensorboardWriter( config.TENSORBOARD_DIR, flush_secs=self.flush_secs ) as writer: while epoch <= config.IL.VQA.max_epochs: start_time = time.time() for batch in train_loader: t += 1 _, questions, answers, frame_queue = batch optim.zero_grad() questions = questions.to(self.device) answers = answers.to(self.device) frame_queue = frame_queue.to(self.device) scores, _ = model(frame_queue, questions) loss = lossFn(scores, answers) # update metrics accuracy, ranks = metrics.compute_ranks( scores.data.cpu(), answers ) metrics.update([loss.item(), accuracy, ranks, 1.0 / ranks]) loss.backward() optim.step() ( metrics_loss, accuracy, mean_rank, mean_reciprocal_rank, ) = metrics.get_stats() avg_loss += metrics_loss avg_accuracy += accuracy avg_mean_rank += mean_rank avg_mean_reciprocal_rank += mean_reciprocal_rank if t % config.LOG_INTERVAL == 0: logger.info("Epoch: {}".format(epoch)) logger.info(metrics.get_stat_string()) writer.add_scalar("loss", metrics_loss, t) writer.add_scalar("accuracy", accuracy, t) writer.add_scalar("mean_rank", mean_rank, t) writer.add_scalar( "mean_reciprocal_rank", mean_reciprocal_rank, t ) metrics.dump_log() # Dataloader length for IterableDataset doesn't take into # account batch size for Pytorch v < 1.6.0 num_batches = math.ceil( len(vqa_dataset) / config.IL.VQA.batch_size ) avg_loss /= num_batches avg_accuracy /= num_batches avg_mean_rank /= num_batches avg_mean_reciprocal_rank /= num_batches end_time = time.time() time_taken = "{:.1f}".format((end_time - start_time) / 60) logger.info( "Epoch {} completed. Time taken: {} minutes.".format( epoch, time_taken ) ) logger.info("Average loss: {:.2f}".format(avg_loss)) logger.info("Average accuracy: {:.2f}".format(avg_accuracy)) logger.info("Average mean rank: {:.2f}".format(avg_mean_rank)) logger.info( "Average mean reciprocal rank: {:.2f}".format( avg_mean_reciprocal_rank ) ) print("-----------------------------------------") if epoch % config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint( model.state_dict(), "epoch_{}.ckpt".format(epoch) ) epoch += 1
def train(self) -> None: r"""Main method for training DAgger. Returns: None """ os.makedirs(self.lmdb_features_dir, exist_ok=True) os.makedirs(self.config.CHECKPOINT_FOLDER, exist_ok=True) if self.config.DAGGER.PRELOAD_LMDB_FEATURES: try: lmdb.open(self.lmdb_features_dir, readonly=True) except lmdb.Error as err: logger.error( "Cannot open database for teacher forcing preload.") raise err else: with lmdb.open(self.lmdb_features_dir, map_size=int(self.config.DAGGER.LMDB_MAP_SIZE) ) as lmdb_env, lmdb_env.begin(write=True) as txn: txn.drop(lmdb_env.open_db()) split = self.config.TASK_CONFIG.DATASET.SPLIT self.config.defrost() self.config.TASK_CONFIG.TASK.NDTW.SPLIT = split self.config.TASK_CONFIG.TASK.SDTW.SPLIT = split # if doing teacher forcing, don't switch the scene until it is complete if self.config.DAGGER.P == 1.0: self.config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_STEPS = ( -1) self.config.freeze() if self.config.DAGGER.PRELOAD_LMDB_FEATURES: # when preloadeding features, its quicker to just load one env as we just # need the observation space from it. single_proc_config = self.config.clone() single_proc_config.defrost() single_proc_config.NUM_PROCESSES = 1 single_proc_config.freeze() self.envs = construct_envs(single_proc_config, get_env_class(self.config.ENV_NAME)) else: self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) self._setup_actor_critic_agent( self.config.MODEL, self.config.DAGGER.LOAD_FROM_CKPT, self.config.DAGGER.CKPT_TO_LOAD, ) logger.info("agent number of parameters: {}".format( sum(param.numel() for param in self.actor_critic.parameters()))) logger.info("agent number of trainable parameters: {}".format( sum(p.numel() for p in self.actor_critic.parameters() if p.requires_grad))) if self.config.DAGGER.PRELOAD_LMDB_FEATURES: self.envs.close() del self.envs self.envs = None with TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs, purge_step=0) as writer: for dagger_it in range(self.config.DAGGER.ITERATIONS): step_id = 0 if not self.config.DAGGER.PRELOAD_LMDB_FEATURES: self._update_dataset(dagger_it + ( 1 if self.config.DAGGER.LOAD_FROM_CKPT else 0)) if torch.cuda.is_available(): with torch.cuda.device(self.device): torch.cuda.empty_cache() gc.collect() dataset = IWTrajectoryDataset( self.lmdb_features_dir, self.config.DAGGER.USE_IW, inflection_weight_coef=self.config.MODEL. inflection_weight_coef, lmdb_map_size=self.config.DAGGER.LMDB_MAP_SIZE, batch_size=self.config.DAGGER.BATCH_SIZE, ) AuxLosses.activate() for epoch in tqdm.trange(self.config.DAGGER.EPOCHS): diter = torch.utils.data.DataLoader( dataset, batch_size=self.config.DAGGER.BATCH_SIZE, shuffle=False, collate_fn=collate_fn, pin_memory=False, drop_last=True, # drop last batch if smaller num_workers=0, ) for batch in tqdm.tqdm(diter, total=dataset.length // dataset.batch_size, leave=False): ( observations_batch, prev_actions_batch, not_done_masks, corrected_actions_batch, weights_batch, ) = batch observations_batch = { k: v.to(device=self.device, non_blocking=True) for k, v in observations_batch.items() } try: loss, action_loss, aux_loss = self._update_agent( observations_batch, prev_actions_batch.to(device=self.device, non_blocking=True), not_done_masks.to(device=self.device, non_blocking=True), corrected_actions_batch.to(device=self.device, non_blocking=True), weights_batch.to(device=self.device, non_blocking=True), ) except: logger.info( "ERROR: failed to update agent. Updating agent with batch size of 1." ) loss, action_loss, aux_loss = 0, 0, 0 prev_actions_batch = prev_actions_batch.cpu() not_done_masks = not_done_masks.cpu() corrected_actions_batch = corrected_actions_batch.cpu( ) weights_batch = weights_batch.cpu() observations_batch = { k: v.cpu() for k, v in observations_batch.items() } for i in range(not_done_masks.size(0)): output = self._update_agent( { k: v[i].to(device=self.device, non_blocking=True) for k, v in observations_batch.items() }, prev_actions_batch[i].to( device=self.device, non_blocking=True), not_done_masks[i].to(device=self.device, non_blocking=True), corrected_actions_batch[i].to( device=self.device, non_blocking=True), weights_batch[i].to(device=self.device, non_blocking=True), ) loss += output[0] action_loss += output[1] aux_loss += output[2] logger.info(f"train_loss: {loss}") logger.info(f"train_action_loss: {action_loss}") logger.info(f"train_aux_loss: {aux_loss}") logger.info(f"Batches processed: {step_id}.") logger.info( f"On DAgger iter {dagger_it}, Epoch {epoch}.") writer.add_scalar(f"train_loss_iter_{dagger_it}", loss, step_id) writer.add_scalar( f"train_action_loss_iter_{dagger_it}", action_loss, step_id) writer.add_scalar(f"train_aux_loss_iter_{dagger_it}", aux_loss, step_id) step_id += 1 self.save_checkpoint( f"ckpt.{dagger_it * self.config.DAGGER.EPOCHS + epoch}.pth" ) AuxLosses.deactivate()
def train(self) -> None: if TIME_DEBUG: s = time.time() # self.config.defrost() # self.config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_EPISODES = 10 # self.config.freeze() if torch.cuda.device_count() <= 1: self.config.defrost() self.config.TORCH_GPU_ID = 0 self.config.SIMULATOR_GPU_ID = 0 self.config.freeze() self.envs = construct_envs(self.config, eval(self.config.ENV_NAME)) if ADD_IL: self.il_envs = construct_envs(self.config, eval(self.config.ENV_NAME), no_val=True) self.collect_mode = 'RL' if TIME_DEBUG: s = log_time(s, 'construct envs') ppo_cfg = self.config.RL.PPO self.device = (torch.device("cuda", self.config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) if not os.path.isdir(self.config.CHECKPOINT_FOLDER): os.makedirs(self.config.CHECKPOINT_FOLDER) self._setup_actor_critic_agent(ppo_cfg) # if 'SMT' in self.config.POLICY: # sd = torch.load('visual_embedding18.pth') # self.actor_critic.net.visual_encoder.load_state_dict(sd['visual_encoder']) # #self.actor_critic.net.prev_action_embedding.load_state_dict(sd['prev_action_embedding']) # self.actor_critic.net.visual_encoder.cuda() # #self.actor_critic.net.prev_action_embedding.cuda() # self.envs.setup_embedding_network(self.actor_critic.net.visual_encoder,None) logger.info("agent number of parameters: {}".format( sum(param.numel() for param in self.agent.parameters()))) num_train_processes, num_val_processes = self.config.NUM_PROCESSES, self.config.NUM_VAL_PROCESSES total_processes = num_train_processes + num_val_processes OBS_LIST = self.config.OBS_TO_SAVE self.num_processes = num_train_processes rollouts = RolloutStorage(ppo_cfg.num_steps, num_train_processes, self.envs.observation_spaces[0], self.envs.action_spaces[0], ppo_cfg.hidden_size, self.actor_critic.net.num_recurrent_layers, OBS_LIST=OBS_LIST) rollouts.to(self.device) batch = self.envs.reset() for sensor in rollouts.observations: try: rollouts.observations[sensor][0].copy_( batch[sensor][:num_train_processes]) except: print('i') self.last_observations = batch self.last_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, total_processes, ppo_cfg.hidden_size).to(self.device) self.last_prev_actions = torch.zeros( total_processes, rollouts.prev_actions.shape[-1]).to(self.device) self.last_masks = torch.zeros(total_processes, 1).to(self.device) # batch and observations may contain shared PyTorch CUDA # tensors. We must explicitly clear them here otherwise # they will be kept in memory for the entire duration of training! batch = None observations = None if ADD_IL: rollouts2 = RolloutStorage( ppo_cfg.num_steps, num_train_processes, self.il_envs.observation_spaces[0], self.il_envs.action_spaces[0], ppo_cfg.hidden_size, self.actor_critic.net.num_recurrent_layers, OBS_LIST=OBS_LIST) rollouts2.to(self.device) batch2 = self.il_envs.reset() for sensor in rollouts2.observations: rollouts2.observations[sensor][0].copy_( batch2[sensor][:num_train_processes]) self.saved_last_obs = batch2 self.saved_last_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, total_processes, ppo_cfg.hidden_size).to(self.device) self.saved_last_prev_actions = torch.zeros( total_processes, rollouts2.prev_actions.shape[-1]).to(self.device) self.saved_last_masks = torch.zeros(total_processes, 1).to(self.device) batch2 = None else: rollouts2 = None current_episode_reward = torch.zeros(self.envs.num_envs, 1) running_episode_stats = dict( count=torch.zeros(self.envs.num_envs, 1), reward=torch.zeros(self.envs.num_envs, 1), ) window_episode_stats = defaultdict( lambda: deque(maxlen=ppo_cfg.reward_window_size)) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 if not hasattr(self, 'resume_steps') else self.resume_steps start_steps = 0 if not hasattr(self, 'resume_steps') else self.resume_steps count_checkpoints = 0 lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) if TIME_DEBUG: s = log_time(s, 'setup all') with TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) as writer: for update in range(self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES) if TIME_DEBUG: s = log_time(s, 'collect rollout start') modes = ['RL'] if ADD_IL: modes += ['IL'] for collect_mode in modes: self.collect_mode = collect_mode use_rollouts = rollouts if self.collect_mode == 'RL' else rollouts2 if ADD_IL: self.exchange_lasts() for step in range(ppo_cfg.num_steps): ( delta_pth_time, delta_env_time, delta_steps, ) = self._collect_rollout_step(use_rollouts, current_episode_reward, running_episode_stats) pth_time += delta_pth_time env_time += delta_env_time count_steps += delta_steps #print(delta_env_time, delta_pth_time) if TIME_DEBUG: s = log_time(s, 'collect rollout done') (delta_pth_time, value_loss, action_loss, dist_entropy, otherlosses) = self._update_agent(ppo_cfg, rollouts, rollouts2) #print(delta_pth_time) pth_time += delta_pth_time rollouts.after_update() if ADD_IL: rollouts2.after_update() if TIME_DEBUG: s = log_time(s, 'update agent') for k, v in running_episode_stats.items(): window_episode_stats[k].append(v.clone()) deltas = { k: ((v[-1][:self.num_processes] - v[0][:self.num_processes]).sum().item() if len(v) > 1 else v[0][:self.num_processes].sum().item()) for k, v in window_episode_stats.items() } deltas["count"] = max(deltas["count"], 1.0) losses = [value_loss, action_loss, dist_entropy, otherlosses] self.write_tb('train', writer, deltas, count_steps, losses) eval_deltas = { k: ((v[-1][self.num_processes:] - v[0][self.num_processes:]).sum().item() if len(v) > 1 else v[0][self.num_processes:].sum().item()) for k, v in window_episode_stats.items() } eval_deltas["count"] = max(eval_deltas["count"], 1.0) self.write_tb('val', writer, eval_deltas, count_steps) # log stats if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info("update: {}\tfps: {:.3f}\t".format( update, (count_steps - start_steps) / (time.time() - t_start))) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format(update, env_time, pth_time, count_steps)) logger.info("Average window size: {} {}".format( len(window_episode_stats["count"]), " ".join("{}: {:.3f}".format(k, v / deltas["count"]) for k, v in deltas.items() if k != "count"), )) logger.info("validation metrics: {}".format( " ".join("{}: {:.3f}".format(k, v / eval_deltas["count"]) for k, v in eval_deltas.items() if k != "count"), )) # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint(f"ckpt.{count_checkpoints}.pth", dict(step=count_steps)) count_checkpoints += 1 self.envs.close()
def _eval_checkpoint(self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0) -> None: r"""Evaluates a single checkpoint. Assumes episode IDs are unique. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ logger.info(f"checkpoint_path: {checkpoint_path}") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config( self.load_checkpoint(checkpoint_path, map_location="cpu")["config"]) else: config = self.config.clone() config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.TASK_CONFIG.TASK.NDTW.SPLIT = config.EVAL.SPLIT config.TASK_CONFIG.TASK.SDTW.SPLIT = config.EVAL.SPLIT config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_STEPS = -1 if len(config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() # setup agent self.envs = construct_envs_auto_reset_false( config, get_env_class(config.ENV_NAME)) self.device = (torch.device("cuda", config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) self._setup_actor_critic_agent(config.MODEL, True, checkpoint_path) observations = self.envs.reset() observations = transform_obs( observations, config.TASK_CONFIG.TASK.INSTRUCTION_SENSOR_UUID) batch = batch_obs(observations, self.device) eval_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, config.NUM_PROCESSES, config.MODEL.STATE_ENCODER.hidden_size, device=self.device, ) prev_actions = torch.zeros(config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(config.NUM_PROCESSES, 1, device=self.device) stats_episodes = {} # dict of dicts that stores stats per episode if len(config.VIDEO_OPTION) > 0: os.makedirs(config.VIDEO_DIR, exist_ok=True) rgb_frames = [[] for _ in range(config.NUM_PROCESSES)] self.actor_critic.eval() while (self.envs.num_envs > 0 and len(stats_episodes) < config.EVAL.EPISODE_COUNT): current_episodes = self.envs.current_episodes() with torch.no_grad(): (_, actions, _, eval_recurrent_hidden_states) = self.actor_critic.act( batch, eval_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=True, ) # actions = batch["vln_oracle_action_sensor"].long() prev_actions.copy_(actions) outputs = self.envs.step([a[0].item() for a in actions]) observations, _, dones, infos = [list(x) for x in zip(*outputs)] not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) # reset envs and observations if necessary for i in range(self.envs.num_envs): if len(config.VIDEO_OPTION) > 0: frame = observations_to_image(observations[i], infos[i]) frame = append_text_to_image( frame, current_episodes[i].instruction.instruction_text) rgb_frames[i].append(frame) if not dones[i]: continue stats_episodes[current_episodes[i].episode_id] = infos[i] observations[i] = self.envs.reset_at(i)[0] prev_actions[i] = torch.zeros(1, dtype=torch.long) if len(config.VIDEO_OPTION) > 0: generate_video( video_option=config.VIDEO_OPTION, video_dir=config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics={ "SPL": round( stats_episodes[current_episodes[i].episode_id] ["spl"], 6) }, tb_writer=writer, ) del stats_episodes[ current_episodes[i].episode_id]["top_down_map"] del stats_episodes[ current_episodes[i].episode_id]["collisions"] rgb_frames[i] = [] observations = transform_obs( observations, config.TASK_CONFIG.TASK.INSTRUCTION_SENSOR_UUID) batch = batch_obs(observations, self.device) envs_to_pause = [] next_episodes = self.envs.current_episodes() for i in range(self.envs.num_envs): if next_episodes[i].episode_id in stats_episodes: envs_to_pause.append(i) ( self.envs, eval_recurrent_hidden_states, not_done_masks, prev_actions, batch, ) = self._pause_envs( envs_to_pause, self.envs, eval_recurrent_hidden_states, not_done_masks, prev_actions, batch, ) self.envs.close() aggregated_stats = {} num_episodes = len(stats_episodes) for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) split = config.TASK_CONFIG.DATASET.SPLIT with open(f"stats_ckpt_{checkpoint_index}_{split}.json", "w") as f: json.dump(aggregated_stats, f, indent=4) logger.info(f"Episodes evaluated: {num_episodes}") checkpoint_num = checkpoint_index + 1 for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.6f}") writer.add_scalar(f"eval_{split}_{k}", v, checkpoint_num)
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, cur_ckpt_idx: int = 0, ) -> None: r""" Evaluates a single checkpoint Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard cur_ckpt_idx: index of cur checkpoint for logging Returns: None """ ckpt_dict = self.load_checkpoint(checkpoint_path, map_location=self.device) ckpt_config = ckpt_dict["config"] config = self.config.clone() ckpt_cmd_opts = ckpt_config.CMD_TRAILING_OPTS eval_cmd_opts = config.CMD_TRAILING_OPTS # config merge priority: eval_opts > ckpt_opts > eval_cfg > ckpt_cfg # first line for old checkpoint compatibility config.merge_from_other_cfg(ckpt_config) config.merge_from_other_cfg(self.config) config.merge_from_list(ckpt_cmd_opts) config.merge_from_list(eval_cmd_opts) ppo_cfg = config.TRAINER.RL.PPO config.TASK_CONFIG.defrost() config.TASK_CONFIG.DATASET.SPLIT = "val" agent_sensors = ppo_cfg.sensors.strip().split(",") config.TASK_CONFIG.SIMULATOR.AGENT_0.SENSORS = agent_sensors if self.video_option: config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, NavRLEnv) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros(ppo_cfg.num_processes, ppo_cfg.hidden_size, device=self.device) not_done_masks = torch.zeros(ppo_cfg.num_processes, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] ] * ppo_cfg.num_processes # type: List[List[np.ndarray]] if self.video_option: os.makedirs(ppo_cfg.video_dir, exist_ok=True) while (len(stats_episodes) < ppo_cfg.count_test_episodes and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = self.actor_critic.act( batch, test_recurrent_hidden_states, not_done_masks, deterministic=False, ) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats["spl"] = infos[i]["spl"] episode_stats["success"] = int(infos[i]["spl"] > 0) episode_stats["reward"] = current_episode_reward[i].item() current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if self.video_option: generate_video( ppo_cfg, rgb_frames[i], current_episodes[i].episode_id, cur_ckpt_idx, infos[i]["spl"], writer, ) rgb_frames[i] = [] # episode continues elif self.video_option: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # pausing self.envs with no new episode if len(envs_to_pause) > 0: state_index = list(range(self.envs.num_envs)) for idx in reversed(envs_to_pause): state_index.pop(idx) self.envs.pause_at(idx) # indexing along the batch dimensions test_recurrent_hidden_states = test_recurrent_hidden_states[ state_index] not_done_masks = not_done_masks[state_index] current_episode_reward = current_episode_reward[state_index] for k, v in batch.items(): batch[k] = v[state_index] if self.video_option: rgb_frames = [rgb_frames[i] for i in state_index] aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) episode_reward_mean = aggregated_stats["reward"] / num_episodes episode_spl_mean = aggregated_stats["spl"] / num_episodes episode_success_mean = aggregated_stats["success"] / num_episodes logger.info( "Average episode reward: {:.6f}".format(episode_reward_mean)) logger.info( "Average episode success: {:.6f}".format(episode_success_mean)) logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean)) writer.add_scalars( "eval_reward", {"average reward": episode_reward_mean}, cur_ckpt_idx, ) writer.add_scalars("eval_SPL", {"average SPL": episode_spl_mean}, cur_ckpt_idx) writer.add_scalars( "eval_success", {"average success": episode_success_mean}, cur_ckpt_idx, )
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ if self._is_distributed: raise RuntimeError("Evaluation does not support distributed mode") # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() if config.VERBOSE: logger.info(f"env config: {config}") self._init_envs(config) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs( observations, device=self.device, cache=self._obs_batching_cache ) batch = apply_obs_transforms_batch(batch, self.obs_transforms) current_episode_reward = torch.zeros( self.envs.num_envs, 1, device="cpu" ) test_recurrent_hidden_states = torch.zeros( self.config.NUM_ENVIRONMENTS, self.actor_critic.net.num_recurrent_layers, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros( self.config.NUM_ENVIRONMENTS, 1, device=self.device, dtype=torch.long, ) not_done_masks = torch.zeros( self.config.NUM_ENVIRONMENTS, 1, device=self.device, dtype=torch.bool, ) stats_episodes: Dict[ Any, Any ] = {} # dict of dicts that stores stats per episode rgb_frames = [ [] for _ in range(self.config.NUM_ENVIRONMENTS) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}." ) logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps pbar = tqdm.tqdm(total=number_of_eval_episodes) self.actor_critic.eval() while ( len(stats_episodes) < number_of_eval_episodes and self.envs.num_envs > 0 ): current_episodes = self.envs.current_episodes() with torch.no_grad(): ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) # type: ignore # NB: Move actions to CPU. If CUDA tensors are # sent in to env.step(), that will create CUDA contexts # in the subprocesses. # For backwards compatibility, we also call .item() to convert to # an int step_data = [a.item() for a in actions.to(device="cpu")] outputs = self.envs.step(step_data) observations, rewards_l, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs( observations, device=self.device, cache=self._obs_batching_cache, ) batch = apply_obs_transforms_batch(batch, self.obs_transforms) not_done_masks = torch.tensor( [[not done] for done in dones], dtype=torch.bool, device="cpu", ) rewards = torch.tensor( rewards_l, dtype=torch.float, device="cpu" ).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not not_done_masks[i].item(): pbar.update() episode_stats = {} episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i]) ) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[ ( current_episodes[i].scene_id, current_episodes[i].episode_id, ) ] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info(infos[i]), tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: # TODO move normalization / channel changing out of the policy and undo it here frame = observations_to_image( {k: v[i] for k, v in batch.items()}, infos[i] ) rgb_frames[i].append(frame) not_done_masks = not_done_masks.to(device=self.device) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) num_episodes = len(stats_episodes) aggregated_stats = {} for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum(v[stat_key] for v in stats_episodes.values()) / num_episodes ) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) self.envs.close()
def train(self, ckpt_path="", ckpt=-1, start_updates=0) -> None: r"""Main method for training PPO. Returns: None """ self.local_rank, tcp_store = init_distrib_slurm( self.config.RL.DDPPO.distrib_backend) add_signal_handlers() # Stores the number of workers that have finished their rollout num_rollouts_done_store = distrib.PrefixStore("rollout_tracker", tcp_store) num_rollouts_done_store.set("num_done", "0") self.world_rank = distrib.get_rank() self.world_size = distrib.get_world_size() random.seed(self.config.TASK_CONFIG.SEED + self.world_rank) np.random.seed(self.config.TASK_CONFIG.SEED + self.world_rank) self.config.defrost() self.config.TORCH_GPU_ID = self.local_rank self.config.SIMULATOR_GPU_ID = self.local_rank self.config.freeze() if torch.cuda.is_available(): self.device = torch.device("cuda", self.local_rank) torch.cuda.set_device(self.device) else: self.device = torch.device("cpu") self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) ppo_cfg = self.config.RL.PPO task_cfg = self.config.TASK_CONFIG.TASK observation_space = self.envs.observation_spaces[0] aux_cfg = self.config.RL.AUX_TASKS init_aux_tasks, num_recurrent_memories, aux_task_strings = self._setup_auxiliary_tasks( aux_cfg, ppo_cfg, task_cfg, observation_space) rollouts = RolloutStorage( ppo_cfg.num_steps, self.envs.num_envs, observation_space, self.envs.action_spaces[0], ppo_cfg.hidden_size, num_recurrent_memories=num_recurrent_memories) rollouts.to(self.device) observations = self.envs.reset() batch = batch_obs(observations, device=self.device) for sensor in rollouts.observations: rollouts.observations[sensor][0].copy_(batch[sensor]) # batch and observations may contain shared PyTorch CUDA # tensors. We must explicitly clear them here otherwise # they will be kept in memory for the entire duration of training! batch = None observations = None self._setup_actor_critic_agent(ppo_cfg, task_cfg, aux_cfg, init_aux_tasks) self.agent.init_distributed(find_unused_params=True) if self.world_rank == 0: logger.info("agent number of trainable parameters: {}".format( sum(param.numel() for param in self.agent.parameters() if param.requires_grad))) current_episode_reward = torch.zeros(self.envs.num_envs, 1) running_episode_stats = dict( count=torch.zeros(self.envs.num_envs, 1), reward=torch.zeros(self.envs.num_envs, 1), # including bonus ) window_episode_stats = defaultdict( lambda: deque(maxlen=ppo_cfg.reward_window_size)) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 prev_time = 0 if ckpt != -1: logger.info( f"Resuming runs at checkpoint {ckpt}. Timing statistics are not tracked properly." ) assert ppo_cfg.use_linear_lr_decay is False and ppo_cfg.use_linear_clip_decay is False, "Resuming with decay not supported" # This is the checkpoint we start saving at count_checkpoints = ckpt + 1 count_steps = start_updates * ppo_cfg.num_steps * self.config.NUM_PROCESSES ckpt_dict = self.load_checkpoint(ckpt_path, map_location="cpu") self.agent.load_state_dict(ckpt_dict["state_dict"]) if "optim_state" in ckpt_dict: self.agent.optimizer.load_state_dict(ckpt_dict["optim_state"]) else: logger.warn("No optimizer state loaded, results may be funky") if "extra_state" in ckpt_dict and "step" in ckpt_dict[ "extra_state"]: count_steps = ckpt_dict["extra_state"]["step"] lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) interrupted_state = load_interrupted_state() if interrupted_state is not None: self.agent.load_state_dict(interrupted_state["state_dict"]) self.agent.optimizer.load_state_dict( interrupted_state["optim_state"]) lr_scheduler.load_state_dict(interrupted_state["lr_sched_state"]) requeue_stats = interrupted_state["requeue_stats"] env_time = requeue_stats["env_time"] pth_time = requeue_stats["pth_time"] count_steps = requeue_stats["count_steps"] count_checkpoints = requeue_stats["count_checkpoints"] start_updates = requeue_stats["start_update"] prev_time = requeue_stats["prev_time"] with (TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) if self.world_rank == 0 else contextlib.suppress()) as writer: for update in range(start_updates, self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES) if EXIT.is_set(): self.envs.close() if REQUEUE.is_set() and self.world_rank == 0: requeue_stats = dict( env_time=env_time, pth_time=pth_time, count_steps=count_steps, count_checkpoints=count_checkpoints, start_update=update, prev_time=(time.time() - t_start) + prev_time, ) save_interrupted_state( dict( state_dict=self.agent.state_dict(), optim_state=self.agent.optimizer.state_dict(), lr_sched_state=lr_scheduler.state_dict(), config=self.config, requeue_stats=requeue_stats, )) requeue_job() return count_steps_delta = 0 self.agent.eval() for step in range(ppo_cfg.num_steps): ( delta_pth_time, delta_env_time, delta_steps, ) = self._collect_rollout_step(rollouts, current_episode_reward, running_episode_stats) pth_time += delta_pth_time env_time += delta_env_time count_steps += delta_steps # This is where the preemption of workers happens. If a # worker detects it will be a straggler, it preempts itself! if (step >= ppo_cfg.num_steps * self.SHORT_ROLLOUT_THRESHOLD ) and int(num_rollouts_done_store.get("num_done")) > ( self.config.RL.DDPPO.sync_frac * self.world_size): break num_rollouts_done_store.add("num_done", 1) self.agent.train() ( delta_pth_time, value_loss, action_loss, dist_entropy, aux_task_losses, aux_dist_entropy, aux_weights, ) = self._update_agent(ppo_cfg, rollouts) pth_time += delta_pth_time stats_ordering = list(sorted(running_episode_stats.keys())) stats = torch.stack( [running_episode_stats[k] for k in stats_ordering], 0).to(self.device) distrib.all_reduce(stats) for i, k in enumerate(stats_ordering): window_episode_stats[k].append(stats[i].clone()) stats = torch.tensor( [ dist_entropy, aux_dist_entropy, ] + [value_loss, action_loss] + aux_task_losses + [count_steps_delta], device=self.device, ) distrib.all_reduce(stats) if aux_weights is not None and len(aux_weights) > 0: distrib.all_reduce( torch.tensor(aux_weights, device=self.device)) count_steps += stats[-1].item() if self.world_rank == 0: num_rollouts_done_store.set("num_done", "0") avg_stats = [ stats[i].item() / self.world_size for i in range(len(stats) - 1) ] losses = avg_stats[2:] dist_entropy, aux_dist_entropy = avg_stats[:2] deltas = { k: ((v[-1] - v[0]).sum().item() if len(v) > 1 else v[0].sum().item()) for k, v in window_episode_stats.items() } deltas["count"] = max(deltas["count"], 1.0) writer.add_scalar( "reward", deltas["reward"] / deltas["count"], count_steps, ) writer.add_scalar( "entropy", dist_entropy, count_steps, ) writer.add_scalar("aux_entropy", aux_dist_entropy, count_steps) # Check to see if there are any metrics # that haven't been logged yet metrics = { k: v / deltas["count"] for k, v in deltas.items() if k not in {"reward", "count"} } if len(metrics) > 0: writer.add_scalars("metrics", metrics, count_steps) writer.add_scalars( "losses", { k: l for l, k in zip(losses, ["value", "policy"] + aux_task_strings) }, count_steps, ) writer.add_scalars( "aux_weights", {k: l for l, k in zip(aux_weights, aux_task_strings)}, count_steps, ) # Log stats formatted_aux_losses = [ "{:.3g}".format(l) for l in aux_task_losses ] if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info( "update: {}\tvalue_loss: {:.3g}\t action_loss: {:.3g}\taux_task_loss: {} \t aux_entropy {:.3g}\t" .format( update, value_loss, action_loss, formatted_aux_losses, aux_dist_entropy, )) logger.info("update: {}\tfps: {:.3f}\t".format( update, count_steps / ((time.time() - t_start) + prev_time), )) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format(update, env_time, pth_time, count_steps)) logger.info("Average window size: {} {}".format( len(window_episode_stats["count"]), " ".join( "{}: {:.3f}".format(k, v / deltas["count"]) for k, v in deltas.items() if k != "count"), )) # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint( f"{self.checkpoint_prefix}.{count_checkpoints}.pth", dict(step=count_steps)) count_checkpoints += 1 self.envs.close()
def train(self) -> None: r"""Main method for DD-PPO. Returns: None """ self.local_rank, tcp_store = init_distrib_slurm( self.config.RL.DDPPO.distrib_backend) add_signal_handlers() # Stores the number of workers that have finished their rollout num_rollouts_done_store = distrib.PrefixStore("rollout_tracker", tcp_store) num_rollouts_done_store.set("num_done", "0") self.world_rank = distrib.get_rank() self.world_size = distrib.get_world_size() self.config.defrost() self.config.TORCH_GPU_ID = self.local_rank self.config.SIMULATOR_GPU_ID = self.local_rank # Multiply by the number of simulators to make sure they also get unique seeds self.config.TASK_CONFIG.SEED += (self.world_rank * self.config.NUM_PROCESSES) self.config.freeze() random.seed(self.config.TASK_CONFIG.SEED) np.random.seed(self.config.TASK_CONFIG.SEED) torch.manual_seed(self.config.TASK_CONFIG.SEED) if torch.cuda.is_available(): self.device = torch.device("cuda", self.local_rank) torch.cuda.set_device(self.device) else: self.device = torch.device("cpu") self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) ppo_cfg = self.config.RL.PPO if (not os.path.isdir(self.config.CHECKPOINT_FOLDER) and self.world_rank == 0): os.makedirs(self.config.CHECKPOINT_FOLDER) self._setup_actor_critic_agent(ppo_cfg) self.agent.init_distributed(find_unused_params=True) if self.world_rank == 0: logger.info("agent number of trainable parameters: {}".format( sum(param.numel() for param in self.agent.parameters() if param.requires_grad))) observations = self.envs.reset() batch = batch_obs(observations, device=self.device) obs_space = self.envs.observation_spaces[0] if self._static_encoder: self._encoder = self.actor_critic.net.visual_encoder obs_space = SpaceDict({ "visual_features": spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=self._encoder.output_shape, dtype=np.float32, ), **obs_space.spaces, }) with torch.no_grad(): batch["visual_features"] = self._encoder(batch) rollouts = RolloutStorage( ppo_cfg.num_steps, self.envs.num_envs, obs_space, self.envs.action_spaces[0], ppo_cfg.hidden_size, num_recurrent_layers=self.actor_critic.net.num_recurrent_layers, ) rollouts.to(self.device) for sensor in rollouts.observations: rollouts.observations[sensor][0].copy_(batch[sensor]) # batch and observations may contain shared PyTorch CUDA # tensors. We must explicitly clear them here otherwise # they will be kept in memory for the entire duration of training! batch = None observations = None current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) running_episode_stats = dict( count=torch.zeros(self.envs.num_envs, 1, device=self.device), reward=torch.zeros(self.envs.num_envs, 1, device=self.device), ) window_episode_stats = defaultdict( lambda: deque(maxlen=ppo_cfg.reward_window_size)) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 start_update = 0 prev_time = 0 lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) interrupted_state = load_interrupted_state() if interrupted_state is not None: self.agent.load_state_dict(interrupted_state["state_dict"]) self.agent.optimizer.load_state_dict( interrupted_state["optim_state"]) lr_scheduler.load_state_dict(interrupted_state["lr_sched_state"]) requeue_stats = interrupted_state["requeue_stats"] env_time = requeue_stats["env_time"] pth_time = requeue_stats["pth_time"] count_steps = requeue_stats["count_steps"] count_checkpoints = requeue_stats["count_checkpoints"] start_update = requeue_stats["start_update"] prev_time = requeue_stats["prev_time"] with (TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) if self.world_rank == 0 else contextlib.suppress()) as writer: for update in range(start_update, self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES) if EXIT.is_set(): self.envs.close() if REQUEUE.is_set() and self.world_rank == 0: requeue_stats = dict( env_time=env_time, pth_time=pth_time, count_steps=count_steps, count_checkpoints=count_checkpoints, start_update=update, prev_time=(time.time() - t_start) + prev_time, ) save_interrupted_state( dict( state_dict=self.agent.state_dict(), optim_state=self.agent.optimizer.state_dict(), lr_sched_state=lr_scheduler.state_dict(), config=self.config, requeue_stats=requeue_stats, )) requeue_job() return count_steps_delta = 0 self.agent.eval() for step in range(ppo_cfg.num_steps): ( delta_pth_time, delta_env_time, delta_steps, ) = self._collect_rollout_step(rollouts, current_episode_reward, running_episode_stats) pth_time += delta_pth_time env_time += delta_env_time count_steps_delta += delta_steps # This is where the preemption of workers happens. If a # worker detects it will be a straggler, it preempts itself! if (step >= ppo_cfg.num_steps * self.SHORT_ROLLOUT_THRESHOLD ) and int(num_rollouts_done_store.get("num_done")) > ( self.config.RL.DDPPO.sync_frac * self.world_size): break num_rollouts_done_store.add("num_done", 1) self.agent.train() if self._static_encoder: self._encoder.eval() ( delta_pth_time, value_loss, action_loss, dist_entropy, ) = self._update_agent(ppo_cfg, rollouts) pth_time += delta_pth_time stats_ordering = list(sorted(running_episode_stats.keys())) stats = torch.stack( [running_episode_stats[k] for k in stats_ordering], 0) distrib.all_reduce(stats) for i, k in enumerate(stats_ordering): window_episode_stats[k].append(stats[i].clone()) stats = torch.tensor( [value_loss, action_loss, count_steps_delta], device=self.device, ) distrib.all_reduce(stats) count_steps += stats[2].item() if self.world_rank == 0: num_rollouts_done_store.set("num_done", "0") losses = [ stats[0].item() / self.world_size, stats[1].item() / self.world_size, ] deltas = { k: ((v[-1] - v[0]).sum().item() if len(v) > 1 else v[0].sum().item()) for k, v in window_episode_stats.items() } deltas["count"] = max(deltas["count"], 1.0) writer.add_scalar( "reward", deltas["reward"] / deltas["count"], count_steps, ) # Check to see if there are any metrics # that haven't been logged yet metrics = { k: v / deltas["count"] for k, v in deltas.items() if k not in {"reward", "count"} } if len(metrics) > 0: writer.add_scalars("metrics", metrics, count_steps) writer.add_scalars( "losses", {k: l for l, k in zip(losses, ["value", "policy"])}, count_steps, ) # log stats if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info("update: {}\tfps: {:.3f}\t".format( update, count_steps / ((time.time() - t_start) + prev_time), )) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format(update, env_time, pth_time, count_steps)) logger.info("Average window size: {} {}".format( len(window_episode_stats["count"]), " ".join( "{}: {:.3f}".format(k, v / deltas["count"]) for k, v in deltas.items() if k != "count"), )) # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint( f"ckpt.{count_checkpoints}.pth", dict(step=count_steps), ) count_checkpoints += 1 self.envs.close()
def train(self) -> None: r"""Main method for training Navigation model of EQA. Returns: None """ config = self.config with habitat.Env(config.TASK_CONFIG) as env: nav_dataset = (NavDataset( config, env, self.device, ).shuffle(1000).decode("rgb")) nav_dataset = nav_dataset.map(nav_dataset.map_dataset_sample) train_loader = DataLoader(nav_dataset, batch_size=config.IL.NAV.batch_size) logger.info("train_loader has {} samples".format(len(nav_dataset))) q_vocab_dict, _ = nav_dataset.get_vocab_dicts() model_kwargs = {"q_vocab": q_vocab_dict.word2idx_dict} model = NavPlannerControllerModel(**model_kwargs) planner_loss_fn = MaskedNLLCriterion() controller_loss_fn = MaskedNLLCriterion() optim = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), lr=float(config.IL.NAV.lr), ) metrics = NavMetric( info={"split": "train"}, metric_names=["planner_loss", "controller_loss"], log_json=os.path.join(config.OUTPUT_LOG_DIR, "train.json"), ) epoch = 1 avg_p_loss = 0.0 avg_c_loss = 0.0 logger.info(model) model.train().to(self.device) with TensorboardWriter( "train_{}/{}".format( config.TENSORBOARD_DIR, datetime.today().strftime("%Y-%m-%d-%H:%M"), ), flush_secs=self.flush_secs, ) as writer: while epoch <= config.IL.NAV.max_epochs: start_time = time.time() for t, batch in enumerate(train_loader): batch = (item.to(self.device, non_blocking=True) for item in batch) ( idx, questions, _, planner_img_feats, planner_actions_in, planner_actions_out, planner_action_lengths, planner_masks, controller_img_feats, controller_actions_in, planner_hidden_idx, controller_outs, controller_action_lengths, controller_masks, ) = batch ( planner_action_lengths, perm_idx, ) = planner_action_lengths.sort(0, descending=True) questions = questions[perm_idx] planner_img_feats = planner_img_feats[perm_idx] planner_actions_in = planner_actions_in[perm_idx] planner_actions_out = planner_actions_out[perm_idx] planner_masks = planner_masks[perm_idx] controller_img_feats = controller_img_feats[perm_idx] controller_actions_in = controller_actions_in[perm_idx] controller_outs = controller_outs[perm_idx] planner_hidden_idx = planner_hidden_idx[perm_idx] controller_action_lengths = controller_action_lengths[ perm_idx] controller_masks = controller_masks[perm_idx] ( planner_scores, controller_scores, planner_hidden, ) = model( questions, planner_img_feats, planner_actions_in, planner_action_lengths.cpu().numpy(), planner_hidden_idx, controller_img_feats, controller_actions_in, controller_action_lengths, ) planner_logprob = F.log_softmax(planner_scores, dim=1) controller_logprob = F.log_softmax(controller_scores, dim=1) planner_loss = planner_loss_fn( planner_logprob, planner_actions_out[:, :planner_action_lengths.max( )].reshape(-1, 1), planner_masks[:, :planner_action_lengths.max()]. reshape(-1, 1), ) controller_loss = controller_loss_fn( controller_logprob, controller_outs[:, :controller_action_lengths.max( )].reshape(-1, 1), controller_masks[:, :controller_action_lengths.max( )].reshape(-1, 1), ) # zero grad optim.zero_grad() # update metrics metrics.update( [planner_loss.item(), controller_loss.item()]) (planner_loss + controller_loss).backward() optim.step() (planner_loss, controller_loss) = metrics.get_stats() avg_p_loss += planner_loss avg_c_loss += controller_loss if t % config.LOG_INTERVAL == 0: logger.info("Epoch: {}".format(epoch)) logger.info(metrics.get_stat_string()) writer.add_scalar("planner loss", planner_loss, t) writer.add_scalar("controller loss", controller_loss, t) metrics.dump_log() # Dataloader length for IterableDataset doesn't take into # account batch size for Pytorch v < 1.6.0 num_batches = math.ceil( len(nav_dataset) / config.IL.NAV.batch_size) avg_p_loss /= num_batches avg_c_loss /= num_batches end_time = time.time() time_taken = "{:.1f}".format((end_time - start_time) / 60) logger.info( "Epoch {} completed. Time taken: {} minutes.".format( epoch, time_taken)) logger.info( "Average planner loss: {:.2f}".format(avg_p_loss)) logger.info( "Average controller loss: {:.2f}".format(avg_c_loss)) print("-----------------------------------------") if epoch % config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint(model.state_dict(), "epoch_{}.ckpt".format(epoch)) epoch += 1
def train(self) -> None: r"""Main method for pre-training Encoder-Decoder Feature Extractor for EQA. Returns: None """ config = self.config eqa_cnn_pretrain_dataset = EQACNNPretrainDataset(config) train_loader = DataLoader( eqa_cnn_pretrain_dataset, batch_size=config.IL.EQACNNPretrain.batch_size, shuffle=True, ) logger.info("[ train_loader has {} samples ]".format( len(eqa_cnn_pretrain_dataset))) model = MultitaskCNN() model.train().to(self.device) optim = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), lr=float(config.IL.EQACNNPretrain.lr), ) depth_loss = torch.nn.SmoothL1Loss() ae_loss = torch.nn.SmoothL1Loss() seg_loss = torch.nn.CrossEntropyLoss() epoch, t = 1, 0 with TensorboardWriter(config.TENSORBOARD_DIR, flush_secs=self.flush_secs) as writer: while epoch <= config.IL.EQACNNPretrain.max_epochs: start_time = time.time() avg_loss = 0.0 for batch in train_loader: t += 1 idx, gt_rgb, gt_depth, gt_seg = batch optim.zero_grad() gt_rgb = gt_rgb.to(self.device) gt_depth = gt_depth.to(self.device) gt_seg = gt_seg.to(self.device) pred_seg, pred_depth, pred_rgb = model(gt_rgb) l1 = seg_loss(pred_seg, gt_seg.long()) l2 = ae_loss(pred_rgb, gt_rgb) l3 = depth_loss(pred_depth, gt_depth) loss = l1 + (10 * l2) + (10 * l3) avg_loss += loss.item() if t % config.LOG_INTERVAL == 0: logger.info( "[ Epoch: {}; iter: {}; loss: {:.3f} ]".format( epoch, t, loss.item())) writer.add_scalar("total_loss", loss, t) writer.add_scalars( "individual_losses", { "seg_loss": l1, "ae_loss": l2, "depth_loss": l3 }, t, ) loss.backward() optim.step() end_time = time.time() time_taken = "{:.1f}".format((end_time - start_time) / 60) avg_loss = avg_loss / len(train_loader) logger.info( "[ Epoch {} completed. Time taken: {} minutes. ]".format( epoch, time_taken)) logger.info("[ Average loss: {:.3f} ]".format(avg_loss)) print("-----------------------------------------") if epoch % config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint(model.state_dict(), "epoch_{}.ckpt".format(epoch)) epoch += 1
def train(self, ckpt_path="", ckpt=-1, start_updates=0) -> None: r"""Main method for training PPO. Returns: None """ self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) ppo_cfg = self.config.RL.PPO task_cfg = self.config.TASK_CONFIG.TASK self.device = (torch.device("cuda", self.config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) # Initialize auxiliary tasks observation_space = self.envs.observation_spaces[0] aux_cfg = self.config.RL.AUX_TASKS init_aux_tasks, num_recurrent_memories, aux_task_strings = \ self._setup_auxiliary_tasks(aux_cfg, ppo_cfg, task_cfg, observation_space) rollouts = RolloutStorage( ppo_cfg.num_steps, self.envs.num_envs, observation_space, self.envs.action_spaces[0], ppo_cfg.hidden_size, num_recurrent_memories=num_recurrent_memories) rollouts.to(self.device) observations = self.envs.reset() batch = batch_obs(observations, device=self.device) for sensor in rollouts.observations: rollouts.observations[sensor][0].copy_(batch[sensor]) # batch and observations may contain shared PyTorch CUDA # tensors. We must explicitly clear them here otherwise # they will be kept in memory for the entire duration of training! batch = None observations = None self._setup_actor_critic_agent(ppo_cfg, task_cfg, aux_cfg, init_aux_tasks) logger.info("agent number of parameters: {}".format( sum(param.numel() for param in self.agent.parameters()))) current_episode_reward = torch.zeros(self.envs.num_envs, 1) running_episode_stats = dict( count=torch.zeros(self.envs.num_envs, 1), reward=torch.zeros(self.envs.num_envs, 1), ) window_episode_stats = defaultdict( lambda: deque(maxlen=ppo_cfg.reward_window_size)) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 if ckpt != -1: logger.info( f"Resuming runs at checkpoint {ckpt}. Timing statistics are not tracked properly." ) assert ppo_cfg.use_linear_lr_decay is False and ppo_cfg.use_linear_clip_decay is False, "Resuming with decay not supported" # This is the checkpoint we start saving at count_checkpoints = ckpt + 1 count_steps = start_updates * ppo_cfg.num_steps * self.config.NUM_PROCESSES ckpt_dict = self.load_checkpoint(ckpt_path, map_location="cpu") self.agent.load_state_dict(ckpt_dict["state_dict"]) if "optim_state" in ckpt_dict: self.agent.optimizer.load_state_dict(ckpt_dict["optim_state"]) else: logger.warn("No optimizer state loaded, results may be funky") if "extra_state" in ckpt_dict and "step" in ckpt_dict[ "extra_state"]: count_steps = ckpt_dict["extra_state"]["step"] lr_scheduler = LambdaLR( optimizer=self.agent.optimizer, lr_lambda=lambda x: linear_decay(x, self.config.NUM_UPDATES), ) with TensorboardWriter(self.config.TENSORBOARD_DIR, flush_secs=self.flush_secs) as writer: for update in range(start_updates, self.config.NUM_UPDATES): if ppo_cfg.use_linear_lr_decay: lr_scheduler.step() if ppo_cfg.use_linear_clip_decay: self.agent.clip_param = ppo_cfg.clip_param * linear_decay( update, self.config.NUM_UPDATES) for step in range(ppo_cfg.num_steps): ( delta_pth_time, delta_env_time, delta_steps, ) = self._collect_rollout_step(rollouts, current_episode_reward, running_episode_stats) pth_time += delta_pth_time env_time += delta_env_time count_steps += delta_steps delta_pth_time, value_loss, action_loss, dist_entropy, aux_task_losses, aux_dist_entropy, aux_weights = self._update_agent( ppo_cfg, rollouts) pth_time += delta_pth_time for k, v in running_episode_stats.items(): window_episode_stats[k].append(v.clone()) deltas = { k: ((v[-1] - v[0]).sum().item() if len(v) > 1 else v[0].sum().item()) for k, v in window_episode_stats.items() } deltas["count"] = max(deltas["count"], 1.0) writer.add_scalar( "entropy", dist_entropy, count_steps, ) writer.add_scalar("aux_entropy", aux_dist_entropy, count_steps) writer.add_scalar("reward", deltas["reward"] / deltas["count"], count_steps) # Check to see if there are any metrics # that haven't been logged yet metrics = { k: v / deltas["count"] for k, v in deltas.items() if k not in {"reward", "count"} } if len(metrics) > 0: writer.add_scalars("metrics", metrics, count_steps) losses = [value_loss, action_loss] + aux_task_losses writer.add_scalars( "losses", { k: l for l, k in zip(losses, ["value", "policy"] + aux_task_strings) }, count_steps, ) writer.add_scalars( "aux_weights", {k: l for l, k in zip(aux_weights, aux_task_strings)}, count_steps, ) writer.add_scalar( "success", deltas["success"] / deltas["count"], count_steps, ) # Log stats if update > 0 and update % self.config.LOG_INTERVAL == 0: logger.info( "update: {}\tvalue_loss: {}\t action_loss: {}\taux_task_loss: {} \t aux_entropy {}" .format(update, value_loss, action_loss, aux_task_losses, aux_dist_entropy)) logger.info("update: {}\tfps: {:.3f}\t".format( update, count_steps / (time.time() - t_start))) logger.info( "update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t" "frames: {}".format(update, env_time, pth_time, count_steps)) logger.info("Average window size: {} {}".format( len(window_episode_stats["count"]), " ".join("{}: {:.3f}".format(k, v / deltas["count"]) for k, v in deltas.items() if k != "count"), )) # checkpoint model if update % self.config.CHECKPOINT_INTERVAL == 0: self.save_checkpoint( f"{self.checkpoint_prefix}.{count_checkpoints}.pth", dict(step=count_steps)) count_checkpoints += 1 self.envs.close()