def get_viz(self): presenter = Presenter() out = {"viz_img": []} for i, img in enumerate(self.viz_images): instruction = self.instructions[i] if len(instruction.view([-1])) < 2: instruction = [0] else: instruction = list(instruction.data.cpu().numpy().squeeze()) instruction_str = debug_untokenize_instruction(instruction) viz_img = presenter.overlay_text(img, instruction_str) out["viz_img"].append(viz_img) return out
def train_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] launch_ui() env = PomdpInterface() print("model_name:", setup["top_down_model"]) print("model_file:", setup["top_down_model_file"]) model, model_loaded = load_model( model_name_override=setup["top_down_model"], model_file_override=setup["top_down_model_file"]) exec_model, wrapper_model_loaded = load_model( model_name_override=setup["wrapper_model"], model_file_override=setup["wrapper_model_file"]) affine2d = Affine2D() if model.is_cuda: affine2d.cuda() eval_envs = get_correct_eval_env_id_list() print("eval_envs:", eval_envs) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( max_size=setup["max_envs"]) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) dataset = model.get_dataset(envs=eval_envs, dataset_name="supervised", eval=True, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for b, batch in list(enumerate(dataloader)): print("batch:", batch) images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] affines = batch["affines_g_to_s"] env_ids = batch["env_id"] set_idxs = batch["set_idx"] seg_idxs = batch["seg_idx"] env_id = env_ids[0][0] set_idx = set_idxs[0][0] print("env_id of this batch:", env_id) env.set_environment( env_id, instruction_set=all_instr[env_id][set_idx]["instructions"]) env.reset(0) num_segments = len(instructions[0]) print("num_segments in this batch:", num_segments) write_instruction("") write_real_instruction("None") instruction_str = read_instruction_file() print("Initial instruction: ", instruction_str) # TODO: Reset model state here if we keep any temporal memory etc for s in range(num_segments): start_state = env.reset(s) keep_going = True real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0) tmp = list(real_instruction.data.cpu()[0].numpy()) real_instruction_str = debug_untokenize_instruction(tmp) write_real_instruction(real_instruction_str) #write_instruction(real_instruction_str) #instruction_str = real_instruction_str image = cuda_var(images[0][s], setup["cuda"], 0) label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0) affine_g_to_s = affines[0][s] print("Your current environment:") with open( "/storage/dxsun/unreal_config_nl/configs/configs/random_config_" + str(env_id) + ".json") as fp: config = json.load(fp) print(config) while keep_going: write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(s) write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction print("Executing: ", instruction_str) break if not keep_going: continue #instruction_str = read_instruction_file() # TODO: Load instruction from file tok_instruction = tokenize_instruction(instruction_str, word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) instruction_v = cuda_var(instruction_t, setup["cuda"], 0) instruction_mask = torch.ones_like(instruction_v) tmp = list(instruction_t[0].numpy()) instruction_dbg_str = debug_untokenize_instruction( tmp, token2term) # import matplotlib.pyplot as plt #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy()) #plt.show() res = model(image, instruction_v, instruction_mask) mask_pred = res[0] shp = mask_pred.shape mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp) #mask_pred = softmax2d(mask_pred) # TODO: Rotate the mask_pred to the global frame affine_s_to_g = np.linalg.inv(affine_g_to_s) S = 8.0 affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]]) affine_scale_down = np.linalg.inv(affine_scale_up) affine_pred_to_g = np.dot( affine_scale_down, np.dot(affine_s_to_g, affine_scale_up)) #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float() mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose( 1, 2, 0) mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g, 32, 32) print("Sum of global mask: ", mask_pred_g_np.sum()) mask_pred_g = torch.from_numpy( mask_pred_g_np.transpose(2, 0, 1)).float()[np.newaxis, :, :, :] exec_model.set_ground_truth_visitation_d(mask_pred_g) # Create a batch axis for pytorch #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :]) mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min() mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9) mask_pred_np[:, :, 0] *= 2.0 mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min() mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9) presenter = Presenter() presenter.show_image(mask_pred_g_np, "mask_pred_g", torch=False, waitkey=1, scale=4) #import matplotlib.pyplot as plt #print("image.data shape:", image.data.cpu().numpy().shape) #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy()) #plt.show() # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4) #import pdb; pdb.set_trace() pred_viz_np = presenter.overlaid_image(image.data, mask_pred_np, channel=0) # TODO: Don't show labels # TODO: OpenCV colours #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0) labl_viz_np = presenter.overlaid_image(image.data, label_mask.data, channel=0) viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1) viz_img_np = pred_viz_np viz_img = presenter.overlay_text(viz_img_np, instruction_dbg_str) cv2.imshow("interactive viz", viz_img) cv2.waitKey(100) rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s], seg_idxs[0][s], tok_instruction) write_instruction("")
class RolloutVisualizer: def __init__(self, resolution=512): self.presenter = Presenter() self.clear() self.current_rollout = {} self.current_rollout_name = None self.env_image = None self.current_timestep = None self.world_size_m = P.get_current_parameters()["Setup"]["world_size_m"] self.resolution = resolution def clear(self): self.current_rollout = {} self.current_rollout_name = None self.env_image = None self.current_timestep = None def _auto_contrast(self, image): import cv2 image_c = np.clip(image, 0.0, 1.0) hsv_image = cv2.cvtColor(image_c, cv2.COLOR_RGB2HSV) hsv_image[:, :, 1] *= 1.2 image_out = cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB) image_out = np.clip(image_out, 0.0, 1.0) #print(image_out.min(), image_out.) print(image_out[:, :, 1].min(), image_out[:, :, 1].max()) return image def _integrate_mask(self, frames): frames_out = [frames[0]] for frame in frames[1:]: new_frame_out = np.maximum(frames_out[-1], frame) frames_out.append(new_frame_out) return frames_out def _draw_landmarks(self, image, env_id): lm_names, lm_idx, lm_pos = get_landmark_locations_airsim(env_id=env_id) image = self.presenter.draw_landmarks(image, lm_names, lm_pos, self.world_size_m) return image def load_video_clip(self, env_id, seg_idx, rollout, domain, cam_name, rollout_dir): video_path = os.path.join( rollout_dir, f"rollout_{cam_name}_{env_id}-0-{seg_idx}.mkv") try: #if os.path.getsize(video_path) > 1024 * 1024 * 30 print("Loading video: ", video_path) clip = mpy.VideoFileClip(video_path) except Exception as e: return None return clip def grab_frames(self, env_id, seg_idx, rollout, domain, frame_name, scale=1): frames = [] for sample in rollout: if frame_name == "image": frame = sample["state"].image elif frame_name == "action": action = sample["action"] bg = np.zeros((400, 400, 3)) frame = self.presenter.draw_action(bg, offset=(0, 0), action=action) elif frame_name == "v_dist_r_inner": frame_t = sample[frame_name][:3, :, :].transpose((1, 2, 0)) # TODO: These should come from params map_size = 64 crop_size = 16 gap = int((map_size - crop_size) / 2) crop_l = gap crop_r = map_size - gap frame_t = frame_t[crop_l:crop_r, crop_l:crop_r, :] frame_t[:, :, 0] /= (np.percentile(frame_t[:, :, 0], 99) + 1e-9) frame_t[:, :, 1] /= (np.percentile(frame_t[:, :, 1], 99) + 1e-9) frame_t = np.clip(frame_t, 0.0, 1.0) shp = list(frame_t.shape) shp[2] = 3 frame = np.zeros(shp) frame[:, :, :2] = frame_t frame = cv2.resize(frame, dsize=(self.resolution, self.resolution)) elif frame_name == "map_struct": frame_t = sample[frame_name][:3, :, :].transpose((1, 2, 0)) shp = list(frame_t.shape) shp[2] = 3 frame = np.zeros(shp) frame[:, :, :2] = frame_t frame = cv2.resize(frame, dsize=(self.resolution, self.resolution)) elif frame_name == "ego_obs_mask": frame_t = sample["map_struct"][:3, :, :].transpose((1, 2, 0)) shp = list(frame_t.shape) shp[2] = 3 canvas = np.zeros(shp) canvas[:, :, :] = 1 - frame_t[:, :, 0:1] canvas[:, :, :] -= frame_t[:, :, 1:2] canvas = np.clip(canvas, 0.0, 1.0) frame = cv2.resize(canvas, dsize=(self.resolution, self.resolution)) else: frame = sample[frame_name][0, :3, :, :].transpose((1, 2, 0)) if frame_name in ["image", "v_dist_r_inner"]: frame -= frame.min() frame = frame / (frame.max() + 1e-9) else: frame -= np.percentile(frame, 0) frame /= (np.percentile(frame, 95) + 1e-9) frame = np.clip(frame, 0.0, 1.0) if scale != 1: frame = self.presenter.scale_image(frame, scale) frames.append(frame) return frames def action_visualization(self, env_id, seg_idx, rollout, domain, frame_name="action"): frames = [] for sample in rollout: action = sample[frame_name] frame = np.ones((200, 200, 3), dtype=np.uint8) self.presenter.draw_action(frame, (1, 159), action) frames.append(frame) return frames def overlay_frames(self, under_frames, over_frames, strength=0.5): overlaid_frames = [ self.presenter.overlaid_image(u, o, strength=strength) for u, o in zip(under_frames, over_frames) ] return overlaid_frames def top_down_visualization(self, env_id, seg_idx, rollout, domain, params): fd = domain == "real" obl = domain in ["simulator", "sim"] print(domain, obl) if params["draw_topdown"]: bg_image = load_env_img( env_id, self.resolution, self.resolution, real_drone=True if domain == "real" else False, origin_bottom_left=obl, flipdiag=False, alpha=True) else: bg_image = np.zeros((self.resolution, self.resolution, 3)) if params["draw_landmarks"]: bg_image = self._draw_landmarks(bg_image, env_id) # Initialize stuff frames = [] poses_m = [] poses_px = [] for sample in rollout: sample_image = bg_image.copy() frames.append(sample_image) state = sample["state"] pose_m = state.get_drone_pose() pose_px = poses_m_to_px(pose_m, self.resolution, self.resolution, self.world_size_m, batch_dim=False) poses_px.append(pose_px) poses_m.append(pose_m) instruction = rollout[0]["instruction"] print("Instruction: ") print(instruction) # Draw visitation distributions if requested: if params["include_vdist"]: print("Drawing visitation distributions") if params["ego_vdist"]: inner_key = "v_dist_r_inner" outer_key = "v_dist_r_outer" else: inner_key = "v_dist_w_inner" outer_key = "v_dist_w_outer" for i, sample in enumerate(rollout): v_dist_w_inner = np.flipud(sample[inner_key].transpose( (2, 1, 0))) # Expand range of each channel separately so that stop entropy doesn't affect how trajectory looks v_dist_w_inner[:, :, 0] /= ( np.percentile(v_dist_w_inner[:, :, 0], 99.5) + 1e-9) v_dist_w_inner[:, :, 1] /= ( np.percentile(v_dist_w_inner[:, :, 1], 99.5) + 1e-9) v_dist_w_inner = np.clip(v_dist_w_inner, 0.0, 1.0) v_dist_w_outer = sample[outer_key] if bg_image.max() - bg_image.min() > 1e-9: f = self.presenter.blend_image(frames[i], v_dist_w_inner) else: f = self.presenter.overlaid_image(frames[i], v_dist_w_inner, strength=1.0) f = self.presenter.draw_prob_bars(f, v_dist_w_outer) frames[i] = f if params["include_layer"]: layer_name = params["include_layer"] print(f"Drawing first 3 channels of layer {layer_name}") accumulate = False invert = False gray = False if layer_name == "M_W_accum": accumulate = True layer_name = "M_W" if layer_name == "M_W_accum_inv": invert = True accumulate = True layer_name = "M_W" if layer_name.endswith("_Gray"): gray = True layer_name = layer_name[:-len("_Gray")] for i, sample in enumerate(rollout): layer = sample[layer_name] if len(layer.shape) == 4: layer = layer[0, :, :, :] layer = layer.transpose((2, 1, 0)) layer = np.flipud(layer) if layer_name in ["S_W", "F_W"]: layer = layer[:, :, :3] else: layer = layer[:, :, :3] if layer_name in ["S_W", "R_W", "F_W"]: if gray: layer -= np.percentile(layer, 1) layer /= (np.percentile(layer, 99) + 1e-9) else: layer /= (np.percentile(layer, 97) + 1e-9) layer = np.clip(layer, 0.0, 1.0) if layer_name in ["M_W"]: # Having a 0-1 mask does not encode properly with the codec. Add a bit of imperceptible gaussian noise. layer = layer.astype(np.float32) layer = np.tile(layer, (1, 1, 3)) if accumulate and i > 0: layer = np.maximum(layer, prev_layer) prev_layer = layer if invert: layer = 1 - layer if frames[i].max() > 0.01: frames[i] = self.presenter.blend_image( frames[i], layer[:, :, :3]) #frames[i] = self.presenter.overlaid_image(frames[i], layer[:, :, :3]) else: scale = (int(self.resolution / layer.shape[0]), int(self.resolution / layer.shape[1])) frames[i] = self.presenter.prep_image(layer[:, :, :3], scale=scale) if params["include_instr"]: print("Drawing instruction") for i, sample in enumerate(rollout): frames[i] = self.presenter.overlay_text( frames[i], sample["instruction"]) # Draw trajectory history if params["draw_trajectory"]: print("Drawing trajectory") for i, sample in enumerate(rollout): history = poses_px[:i + 1] position_history = [h.position for h in history] frames[i] = self.presenter.draw_trajectory( frames[i], position_history, self.world_size_m) # Draw drone if params["draw_drone"]: print("Drawing drone") for i, sample in enumerate(rollout): frames[i] = self.presenter.draw_drone(frames[i], poses_m[i], self.world_size_m) # Draw observability mask: if params["draw_fov"]: print("Drawing FOV") for i, sample in enumerate(rollout): frames[i] = self.presenter.draw_observability( frames[i], poses_m[i], self.world_size_m, 84) # Visualize if False: for i, sample in enumerate(rollout): self.presenter.show_image(frames[i], "sample_image", scale=1, waitkey=True) return frames def start_rollout(self, env_id, set_idx, seg_idx, domain, dataset, suffix=""): rollout_name = f"{env_id}:{set_idx}:{seg_idx}:{domain}:{dataset}:{suffix}" self.current_rollout = {"top-down": []} self.current_rollout_name = rollout_name self.env_image = load_env_img(512, 512, alpha=True) def start_timestep(self, timestep): self.current_timestep = timestep # Add top-down view image for the new timestep self.current_rollout["top-down"].append() self.current_rollout["top-down"].append(self.env_image.copy()) def set_drone_state(self, timestep, state): drone_pose = state.get_cam_pose() # Draw drone sprite on top_down image tdimg = self.current_rollout["top-down"][timestep] tdimg_n = self.presenter.draw_drone( tdimg, drone_pose, P.get_current_parameters()["Setup"]["world_size_m"]) self.current_rollout["top-down"][timestep] = tdimg_n