def gen_lm_aux_labels(self, env_id, instruction, affine): env_conf_json = load_env_config(env_id) landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim( env_conf_json) landmark_pos_in_img = pos_m_to_px( np.asarray(landmark_positions)[:, 0:2], np.array([self.map_w, self.map_h])) landmark_pos_in_seg_img = apply_affine_on_pts(landmark_pos_in_img, affine) if False: plot_path_on_img(self.latest_img_dbg, landmark_pos_in_img) plot_path_on_img(self.latest_rot_img_dbg, landmark_pos_in_seg_img) cv2.imshow("img", self.latest_img_dbg) cv2.imshow("rot_img", self.latest_rot_img_dbg) cv2.waitKey(0) landmark_pos_t = torch.from_numpy(landmark_pos_in_seg_img).unsqueeze(0) landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(0) mask1 = torch.gt(landmark_pos_t, 0) mask2 = torch.lt(landmark_pos_t, self.img_w) mask = mask1 * mask2 mask = mask[:, :, 0] * mask[:, :, 1] mask = mask landmark_pos_t = torch.masked_select( landmark_pos_t, mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2]) landmark_indices_t = torch.masked_select(landmark_indices_t, mask).view([-1]) mentioned_names, mentioned_indices = get_mentioned_landmarks( self.thesaurus, instruction) mentioned_labels_t = empty_float_tensor(list( landmark_indices_t.size())).long() for i, landmark_idx_present in enumerate(landmark_indices_t): if landmark_idx_present in mentioned_indices: mentioned_labels_t[i] = 1 if len(landmark_indices_t) > 0: aux_label = { "lm_pos": landmark_pos_t, "lm_indices": landmark_indices_t, "lm_mentioned": mentioned_labels_t, "lm_visible": mask, } else: aux_label = { "lm_pos": [], "lm_indices": [], "lm_mentioned": [], "lm_visible": [] } return aux_label
def forward(self, coverage_masks_w, cam_poses): pos_px = pos_m_to_px(cam_poses.position[0:1], img_size_px=self.world_size_px, world_size_px=self.world_size_px, world_size_m=self.world_size_m) batch_size = coverage_masks_w.shape[0] # TODO: Don't do this at test-time for everything except the first action! assert cam_poses.position.shape[ 0] > 0, "Not implemented test-time behavior" pos_mask = torch.zeros_like(coverage_masks_w[0, 0]) radius = 6 # 6 pixels is a bit less than a meter x = pos_px[0][0].item() y = pos_px[0][1].item() xi = int(x) yi = int(y) min_x = max(xi - radius, 0) min_y = max(yi - radius, 0) max_x = min(xi + radius, coverage_masks_w.shape[2]) max_y = min(yi + radius, coverage_masks_w.shape[2]) indices = [[i, j] for i in range(min_x, max_x) for j in range(min_y, max_y) if (x - i - 0.5)**2 + (y - j - 0.5)**2 < radius**2] for i, j in indices: pos_mask[i, j] = 1.0 coverage_masks_w_init_pos = ( coverage_masks_w + pos_mask[np.newaxis, np.newaxis, :, :]).clamp( 0, 1) if True: for i in range(batch_size): Presenter().show_image(coverage_masks_w[i, 0], "cov_mask_before", scale=4, waitkey=1) Presenter().show_image(coverage_masks_w_init_pos[i, 0], "cov_mask_after", scale=4, waitkey=True) return coverage_masks_w_init_pos
def provider_past_trajectory(segment_data, data): traj_len = len(segment_data) canvas = np.zeros((64, 64)) canvases_t = [] last_pos = None for timestep in range(traj_len): if segment_data[timestep]["state"] is None: break pos_as = segment_data.state[timestep].state[9:12] pos_map = pos_m_to_px(pos_as[np.newaxis, :], img_size_px=32)[0] if last_pos != None: coords = [last_pos, pos_map] last_pos = pos_map tdd.plot_path_on_img(canvas, coords) cv2.imshow("past_traje", canvas) canvas_t = torch.from_numpy(canvas.copy()) canvases_t.append(canvas_t) canvases_t = torch.stack(canvases_t, dim=0) return [("past_trajectory_map", canvases_t)]
def stop_success(rollout): last_sample = rollout[-1] state = last_sample["state"] stop_pos = state.get_pos_2d() # TODO: Grab these from parameter serve stop_pos_map = pos_m_to_px(stop_pos[np.newaxis, :], img_size_px=32, world_size_px=32, world_size_m=4.7)[0] goal_distribution = last_sample["v_dist_w"][1, :, :] _, argmax_best_goal = goal_distribution.view(-1).max(0) best_stop_pos_x = int(argmax_best_goal / goal_distribution.shape[0]) best_stop_pos_y = int(argmax_best_goal % goal_distribution.shape[0]) best_stop_pos = torch.Tensor([best_stop_pos_x, best_stop_pos_y]) pos = torch.from_numpy(stop_pos_map).float() dst_to_best_stop = torch.norm(pos - best_stop_pos) return dst_to_best_stop.detach().item() < 3.2
def get_reward(self, v_dist_w, cam_pos, action, done, first): # Prepare things pos_in_map_m = cam_pos[0:1, 0:2] # * self.world_size_px / self. pos_in_map_px = torch.from_numpy( transformations.pos_m_to_px(pos_in_map_m.detach().cpu().numpy(), self.world_size_px, self.world_size_m, self.world_size_px))[0] if self.last_pos is None: self.last_pos = pos_in_map_px self.visited_dist = tdd.plot_path_on_img( self.visited_dist, [self.last_pos, pos_in_map_px]) #Presenter().show_image(self.visited_dist, "visited_dist", scale=4, waitkey=1) visit_dist = v_dist_w.inner_distribution[0, 0, :, :] visit_dist = visit_dist.detach().cpu().numpy() goal_unobserved_prob = v_dist_w.outer_prob_mass[0, 1].item() goal_observed_prob = 1 - goal_unobserved_prob # ----------------------------------------------------------------------- # Calculate exploration reward, using probability that goal is observed as a potential function # Don't ever reduce the potential - only increase it goal_unobserved_potential = -goal_unobserved_prob if self.prev_exploration_potential is None: self.prev_exploration_potential = goal_unobserved_potential goal_unobserved_potential = max(goal_unobserved_potential, self.prev_exploration_potential) exploration_reward = ( goal_unobserved_potential - self.prev_exploration_potential) * self.exploration_alpha self.prev_exploration_potential = goal_unobserved_potential # ----------------------------------------------------------------------- # Calculate visitation reward (potential shaped by visitation probability) # Give reward for visiting the high-probability states at next timestep visit_potential = -self.wasserstein_distance(self.visited_dist, visit_dist) if self.prev_visit_potential is None: self.prev_visit_potential = visit_potential visit_reward = (visit_potential - self.prev_visit_potential) * self.visit_alpha self.prev_visit_potential = visit_potential # ----------------------------------------------------------------------- # Calculate stop reward consisting of EMD(stop,goal), P(stop=goal), and -P(stop_oob) if action[3] > 0.5 or done: partial_stop_dist = v_dist_w.inner_distribution[ 0, 1, :, :].detach().cpu().numpy() stopped_dist = tdd.plot_path_on_img(self.empty_distribution(), [pos_in_map_px, pos_in_map_px]) stop_wd = self.wasserstein_distance(partial_stop_dist, stopped_dist) stop_reward = -stop_wd * self.stop_alpha # Calculate reward proportional to P(p_g = p_stop) pos_in_map_m = cam_pos[0:1, 0:2] # * self.world_size_px / self. pos_in_map_px = torch.from_numpy( transformations.pos_m_to_px( pos_in_map_m.detach().cpu().numpy(), self.world_size_px, self.world_size_m, self.world_size_px)) pos_x = int(pos_in_map_px[0, 0].item() + 0.5) pos_y = int(pos_in_map_px[0, 1].item() + 0.5) pos_x = min(max(pos_x, 0), partial_stop_dist.shape[0] - 1) pos_y = min(max(pos_y, 0), partial_stop_dist.shape[1] - 1) stop_prob_at_pos = partial_stop_dist[pos_x, pos_y].item() stop_prob_prop = stop_prob_at_pos / (partial_stop_dist.max() + 1e-10) stop_p_reward = stop_prob_prop * self.stop_p_alpha # Add negative reward for stopping when P(goal oob) is high stop_oob_reward = -self.stop_oob_alpha * goal_unobserved_prob else: stop_reward = 0.0 stop_p_reward = 0.0 stop_oob_reward = 0.0 # ----------------------------------------------------------------------- self.last_pos = pos_in_map_px return visit_reward, stop_reward, exploration_reward, stop_oob_reward, stop_p_reward
def get_reward(self, v_dist_w, goal_oob_prob_w, cam_pos, action): # Prepare things pos_in_map_m = cam_pos[0:1, 0:2] # * self.world_size_px / self. pos_in_map_px = torch.from_numpy( transformations.pos_m_to_px(pos_in_map_m.detach().cpu().numpy(), self.world_size_px, self.world_size_m, self.world_size_px)) pos_x = int(pos_in_map_px[0, 0].item() + 0.5) pos_y = int(pos_in_map_px[0, 1].item() + 0.5) visit_dist = v_dist_w[0, 0, :, :] partial_stop_dist = v_dist_w[0, 1, :, :] outside_stop_prob = goal_oob_prob_w.item() goal_visible_prob = 1 - outside_stop_prob pos_x = min(max(pos_x, 0), visit_dist.shape[0] - 1) pos_y = min(max(pos_y, 0), visit_dist.shape[1] - 1) # ----------------------------------------------------------------------- # Calculate visitation reward (potential shaped by visitation probability) #TODO: Consider this. This way the total reward that can be collected is 1 visit_dist -= visit_dist.min() visit_dist /= (visit_dist.max() + 1e-10) visit_prob = visit_dist[pos_x, pos_y].item() # Give reward for visiting the high-probability states at next timestep visit_potential = self.visit_alpha * visit_prob if self.prev_potential is None: self.prev_potential = visit_potential visit_reward = visit_potential - self.prev_potential self.prev_potential = visit_potential # ----------------------------------------------------------------------- # Calculate stop reward consisting of 2 terms: # Term A: Reward proportional to the goal probability # Term B: Reward proportional to the negative distance to most likely goal location, weighed by the probability that t # TODO: Consider this re-normalization approach and if it's any good partial_stop_dist -= partial_stop_dist.min() partial_stop_dist /= (partial_stop_dist.max() + 0.01) #partial_stop_dist *= goal_visible_prob # No batch dimension here: stop_prob_at_pos = partial_stop_dist[pos_x, pos_y].item() max_stop_prob, argmax_stop_prob = partial_stop_dist.view(-1).max(0) best_stop_pos_x = int(argmax_stop_prob / partial_stop_dist.shape[0]) best_stop_pos_y = int(argmax_stop_prob % partial_stop_dist.shape[0]) best_stop_pos = torch.Tensor([best_stop_pos_x, best_stop_pos_y]) pos = torch.Tensor([pos_x, pos_y]) dst_to_best_stop = torch.norm(pos - best_stop_pos) if self.start_best_stop_dist is None: self.start_best_stop_dist = min(dst_to_best_stop, MIN_START_STOP_DIST_PX) if action[3] > 0.5: # Term A stop_reward_a = (stop_prob_at_pos - self.stop_offset) * self.stop_alpha # Term B stop_reward_b_raw = 0.2 - min( dst_to_best_stop / (self.start_best_stop_dist + 1e-9), 1) #stop_reward_b = stop_reward_b_raw * goal_visible_prob stop_reward_b = stop_reward_b_raw stop_reward = stop_reward_a + stop_reward_b else: stop_reward = 0.0 # ----------------------------------------------------------------------- # Calculate exploration reward, using probability that goal is observed as a potential function if self.prev_goal_visible_prob is None: self.prev_goal_visible_prob = goal_visible_prob exploration_reward = (goal_visible_prob - self.prev_goal_visible_prob ) * self.exploration_alpha self.prev_goal_visible_prob = goal_visible_prob # ----------------------------------------------------------------------- return visit_reward, stop_reward, exploration_reward
def show_landmark_locations(self, loop=True, states=None): # Show landmark locations in first-person images img_all = self.tensor_store.get("images") img_w_all = self.tensor_store.get("images_w") import rollout.run_metadata as md if md.IS_ROLLOUT: # TODO: Discard this and move this to PomdpInterface or something # (it's got nothing to do with the model) # load landmark positions from configs from data_io.env import load_env_config from learning.datasets.aux_data_providers import get_landmark_locations_airsim from learning.models.semantic_map.pinhole_camera_inv import PinholeCameraProjection projector = PinholeCameraProjection( map_size_px=self.params["global_map_size"], world_size_px=self.params["world_size_px"], world_size_m=self.params["world_size_m"], img_x=self.params["img_w"], img_y=self.params["img_h"], cam_fov=self.params["cam_h_fov"], #TODO: Handle correctly domain="sim", use_depth=False) conf_json = load_env_config(md.ENV_ID) landmark_names, landmark_indices, landmark_pos = get_landmark_locations_airsim( conf_json) cam_poses = self.cam_poses_from_states(states) cam_pos = cam_poses.position[0] cam_rot = cam_poses.orientation[0] lm_pos_map_all = [] lm_pos_img_all = [] for i, landmark_in_world in enumerate(landmark_pos): lm_pos_img, landmark_in_cam, status = projector.world_point_to_image( cam_pos, cam_rot, landmark_in_world) lm_pos_map = torch.from_numpy( transformations.pos_m_to_px( landmark_in_world[np.newaxis, :], self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) lm_pos_map_all += [lm_pos_map[0]] if lm_pos_img is not None: lm_pos_img_all += [lm_pos_img] lm_pos_img_all = [lm_pos_img_all] lm_pos_map_all = [lm_pos_map_all] else: lm_pos_img_all = self.tensor_store.get("lm_pos_fpv_img") lm_pos_map_all = self.tensor_store.get("lm_pos_map") print("Plotting landmark points") for i in range(len(img_all)): p = Presenter() overlay_fpv = p.overlay_pts_on_image(img_all[i][0], lm_pos_img_all[i]) overlay_map = p.overlay_pts_on_image(img_w_all[i][0], lm_pos_map_all[i]) p.show_image(overlay_fpv, "landmarks_on_fpv_img", scale=8) p.show_image(overlay_map, "landmarks_on_map", scale=20) if not loop: break
def unbatch(self, batch): # TODO: Carefully consider this line. This is necessary to reset state between batches (e.g. delete all tensors in the tensor store) self.reset() # Get rid of the batch dimension for everything images = self.maybe_cuda(batch["images"])[0] seq_len = images.shape[0] instructions = self.maybe_cuda(batch["instr"])[0][:seq_len] instr_lengths = batch["instr_len"][0] states = self.maybe_cuda(batch["states"])[0] actions = self.maybe_cuda(batch["actions"])[0] # Auxiliary labels lm_pos_fpv = batch["lm_pos_fpv"][0] lm_pos_map = batch["lm_pos_map"][0] lm_indices = batch["lm_indices"][0] goal_pos_map = batch["goal_loc"][0] # TODO: Get rid of this. We will have lm_mentioned booleans and lm_mentioned_idx integers and that's it. TEMPLATES = True if TEMPLATES: lm_mentioned_tplt = batch["lm_mentioned_tplt"][0] side_mentioned_tplt = batch["side_mentioned_tplt"][0] side_mentioned_tplt = self.cuda_var(side_mentioned_tplt) lm_mentioned_tplt = self.cuda_var(lm_mentioned_tplt) lang_lm_mentioned = None else: lm_mentioned_tplt = None side_mentioned_tplt = None lang_lm_mentioned = batch["lang_lm_mentioned"][0] lm_mentioned = batch["lm_mentioned"][0] # This is the first-timestep metadata metadata = batch["md"][0] lm_pos_map = [ torch.from_numpy( transformations.pos_m_to_px( p.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) if p is not None else None for p in lm_pos_map ] goal_pos_map = torch.from_numpy( transformations.pos_m_to_px(goal_pos_map.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) lm_pos_map = [ self.cuda_var(s.long()) if s is not None else None for s in lm_pos_map ] lm_pos_fpv_features = [ self.cuda_var( (s / self.img_to_features_w.img_to_features.get_downscale_factor() ).long()) if s is not None else None for s in lm_pos_fpv ] lm_pos_fpv_img = [ self.cuda_var(s.long()) if s is not None else None for s in lm_pos_fpv ] lm_indices = [ self.cuda_var(s) if s is not None else None for s in lm_indices ] goal_pos_map = self.cuda_var(goal_pos_map) if not TEMPLATES: lang_lm_mentioned = self.cuda_var(lang_lm_mentioned) lm_mentioned = [ self.cuda_var(s) if s is not None else None for s in lm_mentioned ] obs_mask = [True for _ in range(seq_len)] plan_mask = [True for _ in range(seq_len)] pos_enc = None # TODO: Figure out how to keep these properly. Perhaps as a whole batch is best self.tensor_store.keep_inputs("lm_pos_fpv_img", lm_pos_fpv_img) self.tensor_store.keep_inputs("lm_pos_fpv_features", lm_pos_fpv_features) self.tensor_store.keep_inputs("lm_pos_map", lm_pos_map) self.tensor_store.keep_inputs("lm_indices", lm_indices) self.tensor_store.keep_inputs("goal_pos_map", goal_pos_map) if not TEMPLATES: self.tensor_store.keep_inputs("lang_lm_mentioned", lang_lm_mentioned) else: self.tensor_store.keep_inputs("lm_mentioned_tplt", lm_mentioned_tplt) self.tensor_store.keep_inputs("side_mentioned_tplt", side_mentioned_tplt) self.tensor_store.keep_inputs("lm_mentioned", lm_mentioned) # ---------------------------------------------------------------------------- # Optional Auxiliary Inputs # ---------------------------------------------------------------------------- #if self.aux_losses.input_required("lm_pos_map"): self.tensor_store.keep_inputs("lm_pos_map", lm_pos_map) #if self.aux_losses.input_required("lm_indices"): self.tensor_store.keep_inputs("lm_indices", lm_indices) #if self.aux_losses.input_required("lm_mentioned"): self.tensor_store.keep_inputs("lm_mentioned", lm_mentioned) return images, instructions, instr_lengths, states, actions, \ lm_pos_fpv_img, lm_pos_fpv_features, lm_pos_map, lm_indices, goal_pos_map, \ lm_mentioned, lm_mentioned_tplt, side_mentioned_tplt, lang_lm_mentioned, \ metadata, obs_mask, plan_mask, pos_enc
def get_reward(self, v_dist_w, cam_pos, action): # If stopped: pos_in_map_m = cam_pos[0:1, 0:2] # * self.world_size_px / self. pos_in_map_px = torch.from_numpy( transformations.pos_m_to_px(pos_in_map_m.detach().cpu().numpy(), self.world_size_px, self.world_size_m, self.world_size_px)) pos_x = int(pos_in_map_px[0, 0].item() + 0.5) pos_y = int(pos_in_map_px[0, 1].item() + 0.5) visit_dist = v_dist_w[0, 0, :, :] stop_dist = v_dist_w[0, 1, :, :] #TODO: Consider this. This way the total reward that can be collected is 1 visit_dist -= visit_dist.min() visit_dist /= (visit_dist.max() + 1e-10) stop_dist -= stop_dist.min() stop_dist /= (stop_dist.max() + 1e-10) pos_x = min(max(pos_x, 0), visit_dist.shape[0] - 1) pos_y = min(max(pos_y, 0), visit_dist.shape[1] - 1) visit_prob = visit_dist[pos_x, pos_y].item() stop_prob = stop_dist[pos_x, pos_y].item() # No batch dimension here: max_stop_prob, argmax_stop_prob = stop_dist.view(-1).max(0) best_stop_pos_x = int(argmax_stop_prob / stop_dist.shape[0]) best_stop_pos_y = int(argmax_stop_prob % stop_dist.shape[0]) best_stop_pos = torch.Tensor([best_stop_pos_x, best_stop_pos_y]) pos = torch.Tensor([pos_x, pos_y]) dst_to_best_stop = torch.norm(pos - best_stop_pos) if self.start_best_stop_dist is None: self.start_best_stop_dist = min(dst_to_best_stop, MIN_START_STOP_DIST_PX) visit_potential = self.visit_alpha * visit_prob # THIS IS NOT POTENTIAL NOW # TODO: Change terminology if self.prev_potential is None: self.prev_potential = visit_potential # Don't give reward for the first step visit_reward = visit_potential * 0 # Give reward for visiting the high-probability states at next timestep else: visit_reward = visit_potential - self.prev_potential self.prev_potential = visit_potential if action[3] > 0.5: stop_reward_a = (stop_prob - self.stop_offset) * self.stop_alpha stop_reward_b = 0.2 - min( dst_to_best_stop / (self.start_best_stop_dist + 1e-9), 1) stop_reward = stop_reward_a + stop_reward_b else: stop_reward = 0.0 #total_reward = visit_reward + stop_reward return visit_reward, stop_reward
def sup_loss_on_batch(self, batch, eval): self.prof.tick("out") action_loss_total = Variable( empty_float_tensor([1], self.is_cuda, self.cuda_device)) if batch is None: print("Skipping None Batch") return action_loss_total images = self.maybe_cuda(batch["images"]) instructions = self.maybe_cuda(batch["instr"]) instr_lengths = batch["instr_len"] states = self.maybe_cuda(batch["states"]) actions = self.maybe_cuda(batch["actions"]) # Auxiliary labels lm_pos_fpv = batch["lm_pos_fpv"] lm_pos_map = batch["lm_pos_map"] lm_indices = batch["lm_indices"] goal_pos_map = batch["goal_loc"] # TODO: Get rid of this. We will have lm_mentioned booleans and lm_mentioned_idx integers and that's it. TEMPLATES = True if TEMPLATES: lm_mentioned_tplt = batch["lm_mentioned_tplt"] side_mentioned_tplt = batch["side_mentioned_tplt"] else: lang_lm_mentioned = batch["lang_lm_mentioned"] lm_mentioned = batch["lm_mentioned"] # stops = self.maybe_cuda(batch["stops"]) masks = self.maybe_cuda(batch["masks"]) # This is the first-timestep metadata metadata = batch["md"] seq_len = images.size(1) batch_size = images.size(0) count = 0 correct_goal_count = 0 goal_count = 0 # Loop thru batch for b in range(batch_size): seg_idx = -1 self.reset() self.prof.tick("out") b_seq_len = len_until_nones(metadata[b]) # TODO: Generalize this # Slice the data according to the sequence length b_metadata = metadata[b][:b_seq_len] b_images = images[b][:b_seq_len] b_instructions = instructions[b][:b_seq_len] b_instr_len = instr_lengths[b][:b_seq_len] b_states = states[b][:b_seq_len] b_actions = actions[b][:b_seq_len] b_lm_pos_fpv = lm_pos_fpv[b][:b_seq_len] b_lm_pos_map = lm_pos_map[b][:b_seq_len] b_lm_indices = lm_indices[b][:b_seq_len] b_goal_pos = goal_pos_map[b][:b_seq_len] if not TEMPLATES: b_lang_lm_mentioned = lang_lm_mentioned[b][:b_seq_len] b_lm_mentioned = lm_mentioned[b][:b_seq_len] # Convert landmark and goal position from meters_and_metrics to pixels b_lm_pos_map = [ torch.from_numpy( transformations.pos_m_to_px(p.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) if p is not None else None for p in b_lm_pos_map ] b_goal_pos = torch.from_numpy( transformations.pos_m_to_px(b_goal_pos.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) b_lm_pos_map = [ self.cuda_var(s.long()) if s is not None else None for s in b_lm_pos_map ] b_lm_pos_fpv = [ self.cuda_var( (s / RESNET_FACTOR).long()) if s is not None else None for s in b_lm_pos_fpv ] b_lm_indices = [ self.cuda_var(s) if s is not None else None for s in b_lm_indices ] b_goal_pos = self.cuda_var(b_goal_pos) if not TEMPLATES: b_lang_lm_mentioned = self.cuda_var(b_lang_lm_mentioned) b_lm_mentioned = [ self.cuda_var(s) if s is not None else None for s in b_lm_mentioned ] # TODO: Figure out how to keep these properly. Perhaps as a whole batch is best # TODO: Introduce a key-value store (encapsulate instead of inherit) self.tensor_store.keep_inputs("lm_pos_fpv", b_lm_pos_fpv) self.tensor_store.keep_inputs("lm_pos_map", b_lm_pos_map) self.tensor_store.keep_inputs("lm_indices", b_lm_indices) self.tensor_store.keep_inputs("goal_pos_map", b_goal_pos) if not TEMPLATES: self.tensor_store.keep_inputs("lang_lm_mentioned", b_lang_lm_mentioned) self.tensor_store.keep_inputs("lm_mentioned", b_lm_mentioned) # TODO: Abstract all of these if-elses in a modular way once we know which ones are necessary if TEMPLATES: b_lm_mentioned_tplt = lm_mentioned_tplt[b][:b_seq_len] b_side_mentioned_tplt = side_mentioned_tplt[b][:b_seq_len] b_side_mentioned_tplt = self.cuda_var(b_side_mentioned_tplt) b_lm_mentioned_tplt = self.cuda_var(b_lm_mentioned_tplt) self.tensor_store.keep_inputs("lm_mentioned_tplt", b_lm_mentioned_tplt) self.tensor_store.keep_inputs("side_mentioned_tplt", b_side_mentioned_tplt) #b_lm_mentioned = b_lm_mentioned_tplt b_obs_mask = [True for _ in range(b_seq_len)] b_plan_mask = [True for _ in range(b_seq_len)] b_plan_mask_t_cpu = torch.Tensor(b_plan_mask) == True b_plan_mask_t = self.maybe_cuda(b_plan_mask_t_cpu) b_pos_enc = None # ---------------------------------------------------------------------------- # Optional Auxiliary Inputs # ---------------------------------------------------------------------------- if self.aux_losses.input_required("lm_pos_map_select"): b_lm_pos_map_select = [ lm_pos for i, lm_pos in enumerate(b_lm_pos_map) if b_plan_mask[i] ] self.tensor_store.keep_inputs("lm_pos_map_select", b_lm_pos_map_select) if self.aux_losses.input_required("lm_indices_select"): b_lm_indices_select = [ lm_idx for i, lm_idx in enumerate(b_lm_indices) if b_plan_mask[i] ] self.tensor_store.keep_inputs("lm_indices_select", b_lm_indices_select) if self.aux_losses.input_required("lm_mentioned_select"): b_lm_mentioned_select = [ lm_m for i, lm_m in enumerate(b_lm_mentioned) if b_plan_mask[i] ] self.tensor_store.keep_inputs("lm_mentioned_select", b_lm_mentioned_select) # ---------------------------------------------------------------------------- self.prof.tick("inputs") actions = self(b_images, b_states, b_instructions, b_instr_len, has_obs=b_obs_mask, plan=b_plan_mask, pos_enc=b_pos_enc) action_losses, _ = self.action_loss(b_actions, actions, batchreduce=False) self.prof.tick("call") action_losses = self.action_loss.batch_reduce_loss(action_losses) action_loss = self.action_loss.reduce_loss(action_losses) action_loss_total = action_loss count += b_seq_len self.prof.tick("loss") action_loss_avg = action_loss_total / (count + 1e-9) self.prof.tick("out") # Doing this in the end (outside of se aux_losses = self.aux_losses.calculate_aux_loss(self.tensor_store, reduce_average=True) aux_loss = self.aux_losses.combine_losses(aux_losses, self.aux_weights) prefix = self.model_name + ("/eval" if eval else "/train") self.writer.add_dict(prefix, get_current_meters(), self.get_iter()) self.writer.add_dict(prefix, aux_losses, self.get_iter()) self.writer.add_scalar(prefix + "/action_loss", action_loss_avg.data.cpu().item(), self.get_iter()) # TODO: Log value here self.writer.add_scalar(prefix + "/goal_accuracy", self.goal_acc_meter.get(), self.get_iter()) self.prof.tick("auxiliaries") total_loss = action_loss_avg + aux_loss self.inc_iter() self.prof.tick("summaries") self.prof.loop() self.prof.print_stats(1) return total_loss
def unbatch(self, batch, halfway=False): # Inputs images = self.maybe_cuda(batch["images"][0]) seq_len = len(images) instructions = self.maybe_cuda(batch["instr"][0][:seq_len]) instr_lengths = batch["instr_len"][0][:seq_len] states = self.maybe_cuda(batch["states"][0]) if not halfway: plan_mask = batch["plan_mask"][ 0] # True for every timestep that we do visitation prediction firstseg_mask = batch["firstseg_mask"][ 0] # True for every timestep that is a new instruction segment # Labels (including for auxiliary losses) lm_pos_fpv = batch["lm_pos_fpv"][ 0] # All object 2D coordinates in the first-person image lm_pos_map_m = batch["lm_pos_map"][ 0] # All object 2D coordinates in the semantic map lm_indices = batch["lm_indices"][0] # All object class indices goal_pos_map_m = batch["goal_loc"][ 0] # Goal location in the world in meters_and_metrics lm_mentioned = batch["lm_mentioned"][ 0] # 1/0 labels whether object was mentioned/not mentioned in template instruction # TODO: We're taking the FIRST label here. SINGLE SEGMENT ASSUMPTION lang_lm_mentioned = batch["lang_lm_mentioned"][0][ 0] # integer labes as to which object was mentioned start_poses = batch["start_poses"][0] noisy_start_poses = get_noisy_poses_torch( start_poses.numpy(), self.params["pos_variance"], self.params["rot_variance"], cuda=False, cuda_device=None) # Ground truth visitation distributions (in start and global frames) v_dist_w_ground_truth_select = self.maybe_cuda( batch["traj_ground_truth"][0]) start_poses_select = self.batch_select.one( start_poses, plan_mask, v_dist_w_ground_truth_select.device) v_dist_s_ground_truth_select, poses_s = self.map_transform_w_to_s( v_dist_w_ground_truth_select, None, start_poses_select) #self.tensor_store.keep_inputs("v_dist_w_ground_truth_select", v_dist_w_ground_truth_select) self.tensor_store.keep_inputs("v_dist_s_ground_truth_select", v_dist_s_ground_truth_select) #Presenter().show_image(v_dist_s_ground_truth_select.detach().cpu()[0,0], "v_dist_s_ground_truth_select", waitkey=1, scale=4) #Presenter().show_image(v_dist_w_ground_truth_select.detach().cpu()[0,0], "v_dist_w_ground_truth_select", waitkey=1, scale=4) lm_pos_map_px = [ torch.from_numpy( transformations.pos_m_to_px(p.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) if p is not None else None for p in lm_pos_map_m ] goal_pos_map_px = torch.from_numpy( transformations.pos_m_to_px(goal_pos_map_m.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) resnet_factor = self.img_to_features_w.img_to_features.get_downscale_factor( ) lm_pos_fpv = [ self.cuda_var( (s / resnet_factor).long()) if s is not None else None for s in lm_pos_fpv ] lm_indices = [ self.cuda_var(s) if s is not None else None for s in lm_indices ] lm_mentioned = [ self.cuda_var(s) if s is not None else None for s in lm_mentioned ] lang_lm_mentioned = self.cuda_var(lang_lm_mentioned) lm_pos_map_px = [ self.cuda_var(s.long()) if s is not None else None for s in lm_pos_map_px ] goal_pos_map_px = self.cuda_var(goal_pos_map_px) self.tensor_store.keep_inputs("lm_pos_fpv", lm_pos_fpv) self.tensor_store.keep_inputs("lm_pos_map", lm_pos_map_px) self.tensor_store.keep_inputs("lm_indices", lm_indices) self.tensor_store.keep_inputs("lm_mentioned", lm_mentioned) self.tensor_store.keep_inputs("lang_lm_mentioned", lang_lm_mentioned) self.tensor_store.keep_inputs("goal_pos_map", goal_pos_map_px) lm_pos_map_select = [ lm_pos for i, lm_pos in enumerate(lm_pos_map_px) if plan_mask[i] ] lm_indices_select = [ lm_idx for i, lm_idx in enumerate(lm_indices) if plan_mask[i] ] lm_mentioned_select = [ lm_m for i, lm_m in enumerate(lm_mentioned) if plan_mask[i] ] goal_pos_map_select = [ pos for i, pos in enumerate(goal_pos_map_px) if plan_mask[i] ] self.tensor_store.keep_inputs("lm_pos_map_select", lm_pos_map_select) self.tensor_store.keep_inputs("lm_indices_select", lm_indices_select) self.tensor_store.keep_inputs("lm_mentioned_select", lm_mentioned_select) self.tensor_store.keep_inputs("goal_pos_map_select", goal_pos_map_select) # We won't need this extra information else: noisy_poses, start_poses, noisy_start_poses = None, None, None plan_mask, firstseg_mask = None, None metadata = batch["md"][0][0] env_id = metadata["env_id"] self.tensor_store.set_flag("env_id", env_id) return images, states, instructions, instr_lengths, plan_mask, firstseg_mask, start_poses, noisy_start_poses, metadata