def plot_path(self, env_id, paths, interactive=False, show=True, bg=True): if interactive: plt.ion() plt.clf() else: plt.ioff() #config_size = UnrealUnits().get_config_size() if bg: try: img = load_env_img(env_id) plt.imshow(img, extent=(0, config_size[0], 0, config_size[1])) except Exception as e: print("Error in loading and plotting path!") print(e) colors = ["r", "g", "b", "y", "c", "m"] for path, color in zip(paths, colors): # Note that x and y are swapped x_targets, y_targets = list(zip(*path)) y_targets = config_size[1] - y_targets plt.plot(y_targets, x_targets, color) plt.plot(y_targets[-1], x_targets[-1], color + "o") plt.axis([0, config_size[0], 0, config_size[1]]) if show: plt.show() plt.pause(0.0001)
def get_top_down_image(self, env_id, set_idx, seg_idx): """ To be called externally to retrieve a top-down environment image oriented with the start of the requested segment :param env_id: environment id :param set_idx: instruction set number :param seg_idx: segment index :return: """ # TODO: Revise the bazillion versions of poses - get rid of this specific one path = load_path(env_id) env_image = load_env_img(env_id, self.map_w, self.map_h) path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) plot_path_on_img(env_image, path_img) seg = self.all_instr[env_id][set_idx]["instructions"][seg_idx] start_idx = seg["start_idx"] start_pt, dir_yaw = get_start_pt_and_yaw(path, start_idx, self.map_w, self.map_h, self.yaw_rand_range) if start_pt is None: return None affine = get_affine_matrix(start_pt, dir_yaw) seg_img_t = self.gen_top_down_image(env_image, affine) #seg_img_t = seg_img_t.permute(0, 1, 3, 2) # A 2D pose is specified as [pos_x, pos_y, yaw] # A 3D pose would be [pos_x, pos_y, pos_z, r_x, r_y, r_z, r_w] img_pose_2d = {"pos": start_pt, "yaw": dir_yaw} img_pose_2d_t = torch.FloatTensor([start_pt[0], start_pt[1], dir_yaw]).unsqueeze(0) return seg_img_t, img_pose_2d_t
def get_top_down_image_env(self, env_id, egocentric=False): """ To be called externally to retrieve a top-down environment image oriented with the start of the requested segment :param env_id: environment id :return: """ path = load_path(env_id) env_image_in = load_env_img(env_id, self.map_w, self.map_h) # If we need to return a bigger image resolution than we loaded if self.map_w != self.img_w or self.map_h != self.img_h: env_image = np.zeros( [self.img_h, self.img_w, env_image_in.shape[2]]) env_image[0:self.map_h, 0:self.map_w, :] = env_image_in else: env_image = env_image_in #path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) #self.plot_path_on_img(env_image, path_img) env_image = standardize_image(env_image) env_img_t = torch.from_numpy(env_image).unsqueeze(0).float() #presenter = Presenter() #presenter.show_image(env_img_t[0], "data_img", torch=True, scale=1) return env_img_t
def start_rollout(self, env_id, set_idx, seg_idx, domain, dataset, suffix=""): rollout_name = f"{env_id}:{set_idx}:{seg_idx}:{domain}:{dataset}:{suffix}" self.current_rollout = {"top-down": []} self.current_rollout_name = rollout_name self.env_image = load_env_img(512, 512, alpha=True)
def set_current_env_id(self, env_id, instance_id=None): # If the new environment has a different arrangement of landmarks, we need the user to physically move them new_env_config = load_and_convert_env_config(env_id) if self.env_config is None: self.env_config = new_env_config self.rviz.publish_env_config("env_config", new_env_config) if not configs_equal(env_id, self.env_id) and \ not P.get_current_parameters()["Setup"].get("dont_place_landmarks"): # Land the drone first, otherwise it's unsafe to walk inside the cage if not self.landed: self.land(new_env_config) # Wait for the user to place the landmarks env_img = self.landmark_configurator.configure_landmarks(env_id) else: env_img = load_env_img(self.env_id, real_drone=True, origin_bottom_left=False) save_env_img(env_id, env_img, real_drone=True) self.env_id = env_id self.env_config = new_env_config
def configure_landmarks(self, env_id): self.env_config = load_and_convert_env_config(env_id) self.state_positioning = True self.state_instructions_printed = False self.subscriber = rospy.Subscriber(self.img_topic, Image, self._image_callback) self.enter_monitor = EnterMonitor() self.monitor_runner = MonitorRunner(self.enter_monitor) env_sim_img = load_env_img(env_id, width=400, height=400, real_drone=False, origin_bottom_left=False) new = True while True: if self.new_image: Presenter().show_image(self.image_to_show, "Landmark Positioning", scale=2, waitkey=10) Presenter().show_image(env_sim_img, "Sim Image", scale=2, waitkey=10) if new: cv2.moveWindow("Landmark Positioning", 20, 20) cv2.moveWindow("Sim Image", 1000, 20) new = False if self.enter_monitor.tapped or SKIP_CONFIGURATION: break sleep(1) cv2.destroyWindow('Landmark Positioning') cv2.destroyWindow("Sim Image") self.subscriber.unregister() return self.image_to_show
def save_path_overlays(self, metadata): from data_io.env import load_env_img from data_io.results import save_results_extra_image import cv2 import numpy as np img = load_env_img(metadata["env_id"], width=256, height=256, real_drone=(self.domain == "real"), flipdiag=True).astype(np.float32) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) log_v_dist = self.tensor_store.get_inputs_batch( "log_v_dist_w_select")[0] v_dist_w, goal_oob_prob = self.visitation_softmax( log_v_dist.inner_distribution, log_v_dist.outer_prob_mass[:, 1]) v_dist_w = v_dist_w.contiguous() # Squish distributions in 0-1 range #idx = int((v_dist_w.shape[0]) / 2) idx = -1 mx1 = torch.max(v_dist_w[idx, 0].view(-1)).detach().item() mx2 = torch.max(v_dist_w[idx, 1].view(-1)).detach().item() v_dist_w[idx, 0] /= (mx1 + 1e-10) v_dist_w[idx, 1] /= (mx2 + 1e-10) overlaid_img = Presenter().overlaid_image(img, v_dist_w[idx], gray_bg=False) overlaid_img = Presenter().overlay_text(overlaid_img, metadata["instruction"]) name = f"{self.domain}_pathpred_overlay" name = "" save_results_extra_image(self.run_name, metadata["env_id"], metadata["set_idx"], metadata["seg_idx"], name, overlaid_img, extra=False)
def provider_rot_top_down_images(segment_data, data): env_id = segment_data.metadata[0]["env_id"] path = load_path(env_id) env_image = load_env_img(env_id, 256, 256) top_down_images = [] top_down_labels = [] for md in segment_data.metadata: if md is None: break set_idx = md["set_idx"] seg_idx = md["seg_idx"] instr_seg = get_instruction_segment(env_id, set_idx, seg_idx) start_idx = instr_seg["start_idx"] end_idx = instr_seg["end_idx"] start_pt, dir_yaw = tdd.get_start_pt_and_yaw(path, start_idx, 256, 256, 0) affine = tdd.get_affine_matrix(start_pt, dir_yaw, 512, 512) seg_img_t = tdd.gen_top_down_image(env_image, affine, 512, 512, 256, 256) seg_labels_t = tdd.gen_top_down_labels(path[start_idx:end_idx], affine, 512, 512, 256, 256, True, True) seg_labels_t = F.max_pool2d(Variable(seg_labels_t), 8).data top_down_images.append(seg_img_t) top_down_labels.append(seg_labels_t) tdimg_t = torch.cat(top_down_images, dim=0) tdlab_t = torch.cat(top_down_labels, dim=0) return [("top_down_images", tdimg_t), ("traj_ground_truth", tdlab_t)]
def plot_paths(self, segment_dataset, world_size, segment_path=None, file=None, interactive=False, bg=True, texts=[], entire_trajectory=False, real_drone=False): if interactive: plt.ion() else: plt.ioff() if len(segment_dataset) == 0: print("Empty segment. Not plotting!") return path_key = "path" if entire_trajectory else "seg_path" md = segment_dataset[0]["metadata"] if "metadata" in segment_dataset[ 0] else segment_dataset[0] env_id = md["env_id"] if segment_path is None: segment_path = md[path_key] segment_path_px = (segment_path * 512 / world_size).astype(np.int32) #segment_path_px[:,0] = 512 - segment_path_px[:,0] if entire_trajectory: instructions = [ segment_dataset[i]["instruction"] for i in range(len(segment_dataset)) ] unique_instructions = [instructions[0]] for instruction in instructions: if instruction != unique_instructions[-1]: unique_instructions.append(instruction) instruction = "; ".join(unique_instructions) else: instruction = segment_dataset[0]["instruction"] if bg: try: img = load_env_img(env_id, width=512, height=512, real_drone=real_drone, origin_bottom_left=True) plt.imshow(img, extent=(0, 512, 0, 512)) except Exception as e: print("Error in plotting paths!") print(e) plt.plot(segment_path_px[:, 1], segment_path_px[:, 0], "r") plt.plot(segment_path_px[-1, 1], segment_path_px[-1, 0], "ro") actual_path = [] for sample in segment_dataset: actual_path.append(sample["state"].state[0:2]) actual_path_px = (np.asarray(actual_path) * 512 / world_size).astype( np.int32) #actual_path_px[:,0] = 512 - actual_path_px[:,0] plt.plot(actual_path_px[:, 1], actual_path_px[:, 0], "b") plt.plot(actual_path_px[-1, 1], actual_path_px[-1, 0], "bo") plt.axis([0, 512, 0, 512]) instruction_split = self.split_lines(instruction, maxchars=40) title = "\n".join(instruction_split) plt.title("env: " + str(env_id) + " - " + title) x = 10 y = 5 gap = 20 for text in texts: if not DONT_DRAW_TEXT: plt.text(x, y, text) y += gap y += len(instruction_split) * gap for line in instruction_split: if not DONT_DRAW_TEXT: plt.text(x, y, line) y -= gap if interactive: plt.show() plt.pause(0.0001)
def get_item(self, env_id, set_idx, seg_idx): path = load_path(env_id) env_image = load_env_img(env_id, self.map_w, self.map_h) self.latest_img_dbg = env_image data = { "images": [], "instr": [], "traj_labels": [], "affines_g_to_s": [], "lm_pos": [], "lm_indices": [], "lm_mentioned": [], "lm_visible": [], "set_idx": [], "seg_idx": [], "env_id": [] } if self.include_instr_negatives: data["neg_instr"] = [] # Somehow load the instruction with the start and end indices for each of the N segments if self.seg_level: instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] else: instruction_segments = self.all_instr[env_id][0]["instructions"] for seg_idx, seg in enumerate(instruction_segments): start_idx = seg["start_idx"] end_idx = seg["end_idx"] instruction = seg["instruction"] start_pt, dir_yaw = get_start_pt_and_yaw(path, start_idx, self.map_w, self.map_h, self.yaw_rand_range) if start_pt is None: continue affine = get_affine_matrix(start_pt, dir_yaw, self.img_w, self.img_h) if DEBUG: env_image = self.latest_img_dbg print("Start Pt: ", start_pt) print("Start Yaw: ", dir_yaw) path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) seg_path = path_img[start_idx:end_idx] env_image = env_image.copy() plot_path_on_img(env_image, seg_path) seg_img_t = gen_top_down_image(env_image, affine, self.img_w, self.img_h, self.map_w, self.map_h) seg_labels_t = gen_top_down_labels(path[start_idx:end_idx], affine, self.img_w, self.img_h, self.map_w, self.map_h, self.incl_path, self.incl_endpoint) instruction_t = self.gen_instruction(instruction) aux_label = self.gen_lm_aux_labels(env_id, instruction, affine) if DEBUG: cv2.waitKey(0) if self.include_instr_negatives: neg_instruction_t = self.gen_neg_instructions(env_id, seg_idx) data["neg_instr"].append(neg_instruction_t) data["images"].append(seg_img_t) data["instr"].append(instruction_t) data["traj_labels"].append(seg_labels_t) data["affines_g_to_s"].append(affine) data["env_id"].append(env_id) data["set_idx"].append(set_idx) data["seg_idx"].append(seg_idx) data = dictlist_append(data, aux_label) return data
def plot_paths(self, segment_dataset, segment_path=None, file=None, interactive=False, bg=True, texts=[], entire_trajectory=False): if interactive: plt.ion() else: plt.ioff() if len(segment_dataset) == 0: print("Empty segment. Not plotting!") return path_key = "path" if entire_trajectory else "seg_path" env_id = segment_dataset[0]["metadata"]["env_id"] if segment_path is None: segment_path = segment_dataset[0]["metadata"][path_key] config_size = UnrealUnits().get_config_size() y_targets, x_targets = list(zip(*cf_to_img(segment_path, [512, 512]))) y_targets = np.asarray(y_targets) * config_size[1] / 512 x_targets = np.asarray(x_targets) * config_size[0] / 512 y_targets = config_size[1] - y_targets #x_targets = CONFIG_SIZE[1] - x_targets # Note that x and y are swapped #x_targets, y_targets = list(zip(*segment_path)) if entire_trajectory: instructions = [ segment_dataset[i]["instruction"] for i in range(len(segment_dataset)) ] unique_instructions = [instructions[0]] for instruction in instructions: if instruction != unique_instructions[-1]: unique_instructions.append(instruction) instruction = "; ".join(unique_instructions) else: instruction = segment_dataset[0]["instruction"] if bg: try: img = load_env_img(env_id) plt.imshow(img, extent=(0, config_size[0], 0, config_size[1])) except Exception as e: print("Error in plotting paths!") print(e) #y_targets = CONFIG_SIZE[1] - y_targets plt.plot(x_targets, y_targets, "r") plt.plot(x_targets[-1], y_targets[-1], "ro") # Plot segment endpoints #for segment in segment_dataset: # end = segment.metadata["seg_path"][-1] # end_x = end[0] # end_y = CONFIG_SIZE[1] - end[1] # plt.plot(end_y, end_x, "ro") x_actual = [] y_actual = [] for sample in segment_dataset: x_actual.append(sample["state"].state[0]) y_actual.append(sample["state"].state[1]) x_actual = np.asarray(x_actual) y_actual = np.asarray(y_actual) """if len(segment_dataset) > 0: instruction, drone_states, actions, rewards, finished = zip(*segment_dataset) drone_states = np.asarray(drone_states) x_actual = drone_states[:, 0] y_actual = drone_states[:, 1]""" plt.plot(x_actual, y_actual, "b") plt.plot(x_actual[-1], y_actual[-1], "bo") plt.axis([0, config_size[0], 0, config_size[1]]) instruction_split = self.split_lines(instruction, maxchars=40) title = "\n".join(instruction_split) plt.title("env: " + str(env_id) + " - " + title) x = 10 y = 5 for text in texts: if not DONT_DRAW_TEXT: plt.text(x, y, text) y += 40 y += len(instruction_split) * 40 + 40 for line in instruction_split: if not DONT_DRAW_TEXT: plt.text(x, y, line) y -= 40 if interactive: plt.show() plt.pause(0.0001)
def get_map_overlaid(img, env_id): env_img = load_env_img(env_id, width=256, height=256) overlaid = Presenter().overlaid_image(env_img, img) return overlaid
def affine_2d_test(): img = load_env_img(2, 128, 128) img = standardize_image(img) img = torch.from_numpy(img).float().unsqueeze(0) px = 64 py = 64 theta = 0.5 c = math.cos(theta) s = math.sin(theta) t_p = torch.FloatTensor([[1, 0, px], [0, 1, py], [0, 0, 1]]).unsqueeze(0) t_r = torch.FloatTensor([[c, -s, 0], [s, c, 0], [0, 0, 1]]).unsqueeze(0) mat_np = np.dot(t_p.squeeze().numpy(), t_r.squeeze().numpy()) mat_np_t = torch.from_numpy(mat_np).unsqueeze(0) # For some forsaken reason rightmultiplying seems to mean applying the transformation second mat = torch.bmm(t_p, t_r) #mat1 = t_p #mat2 = t_r affine_2d = Affine2D() res1 = affine_2d(Variable(img), Variable(t_r)) res2 = affine_2d(res1, Variable(t_p)) res3 = affine_2d(img, Variable(mat)) res4 = affine_2d(img, Variable(mat_np_t)) res3_big = affine_2d(img, Variable(mat), out_size=[512, 512]) res3_small = affine_2d(img, Variable(mat), out_size=[128, 128]) Presenter().show_image(res1.data[0], "res_1", torch=True, waitkey=False, scale=4) Presenter().show_image(res2.data[0], "res_2", torch=True, waitkey=False, scale=4) Presenter().show_image(res3.data[0], "res_3", torch=True, waitkey=False, scale=4) Presenter().show_image(res3_big.data[0], "res3_big", torch=True, waitkey=False, scale=4) Presenter().show_image(res3_small.data[0], "res3_small", torch=True, waitkey=False, scale=4) Presenter().show_image(res4.data[0], "res_4", torch=True, waitkey=True, scale=4) print("res2 should be the same as res_3 and res_4")
def top_down_visualization(self, env_id, seg_idx, rollout, domain, params): fd = domain == "real" obl = domain in ["simulator", "sim"] print(domain, obl) if params["draw_topdown"]: bg_image = load_env_img( env_id, self.resolution, self.resolution, real_drone=True if domain == "real" else False, origin_bottom_left=obl, flipdiag=False, alpha=True) else: bg_image = np.zeros((self.resolution, self.resolution, 3)) if params["draw_landmarks"]: bg_image = self._draw_landmarks(bg_image, env_id) # Initialize stuff frames = [] poses_m = [] poses_px = [] for sample in rollout: sample_image = bg_image.copy() frames.append(sample_image) state = sample["state"] pose_m = state.get_drone_pose() pose_px = poses_m_to_px(pose_m, self.resolution, self.resolution, self.world_size_m, batch_dim=False) poses_px.append(pose_px) poses_m.append(pose_m) instruction = rollout[0]["instruction"] print("Instruction: ") print(instruction) # Draw visitation distributions if requested: if params["include_vdist"]: print("Drawing visitation distributions") if params["ego_vdist"]: inner_key = "v_dist_r_inner" outer_key = "v_dist_r_outer" else: inner_key = "v_dist_w_inner" outer_key = "v_dist_w_outer" for i, sample in enumerate(rollout): v_dist_w_inner = np.flipud(sample[inner_key].transpose( (2, 1, 0))) # Expand range of each channel separately so that stop entropy doesn't affect how trajectory looks v_dist_w_inner[:, :, 0] /= ( np.percentile(v_dist_w_inner[:, :, 0], 99.5) + 1e-9) v_dist_w_inner[:, :, 1] /= ( np.percentile(v_dist_w_inner[:, :, 1], 99.5) + 1e-9) v_dist_w_inner = np.clip(v_dist_w_inner, 0.0, 1.0) v_dist_w_outer = sample[outer_key] if bg_image.max() - bg_image.min() > 1e-9: f = self.presenter.blend_image(frames[i], v_dist_w_inner) else: f = self.presenter.overlaid_image(frames[i], v_dist_w_inner, strength=1.0) f = self.presenter.draw_prob_bars(f, v_dist_w_outer) frames[i] = f if params["include_layer"]: layer_name = params["include_layer"] print(f"Drawing first 3 channels of layer {layer_name}") accumulate = False invert = False gray = False if layer_name == "M_W_accum": accumulate = True layer_name = "M_W" if layer_name == "M_W_accum_inv": invert = True accumulate = True layer_name = "M_W" if layer_name.endswith("_Gray"): gray = True layer_name = layer_name[:-len("_Gray")] for i, sample in enumerate(rollout): layer = sample[layer_name] if len(layer.shape) == 4: layer = layer[0, :, :, :] layer = layer.transpose((2, 1, 0)) layer = np.flipud(layer) if layer_name in ["S_W", "F_W"]: layer = layer[:, :, :3] else: layer = layer[:, :, :3] if layer_name in ["S_W", "R_W", "F_W"]: if gray: layer -= np.percentile(layer, 1) layer /= (np.percentile(layer, 99) + 1e-9) else: layer /= (np.percentile(layer, 97) + 1e-9) layer = np.clip(layer, 0.0, 1.0) if layer_name in ["M_W"]: # Having a 0-1 mask does not encode properly with the codec. Add a bit of imperceptible gaussian noise. layer = layer.astype(np.float32) layer = np.tile(layer, (1, 1, 3)) if accumulate and i > 0: layer = np.maximum(layer, prev_layer) prev_layer = layer if invert: layer = 1 - layer if frames[i].max() > 0.01: frames[i] = self.presenter.blend_image( frames[i], layer[:, :, :3]) #frames[i] = self.presenter.overlaid_image(frames[i], layer[:, :, :3]) else: scale = (int(self.resolution / layer.shape[0]), int(self.resolution / layer.shape[1])) frames[i] = self.presenter.prep_image(layer[:, :, :3], scale=scale) if params["include_instr"]: print("Drawing instruction") for i, sample in enumerate(rollout): frames[i] = self.presenter.overlay_text( frames[i], sample["instruction"]) # Draw trajectory history if params["draw_trajectory"]: print("Drawing trajectory") for i, sample in enumerate(rollout): history = poses_px[:i + 1] position_history = [h.position for h in history] frames[i] = self.presenter.draw_trajectory( frames[i], position_history, self.world_size_m) # Draw drone if params["draw_drone"]: print("Drawing drone") for i, sample in enumerate(rollout): frames[i] = self.presenter.draw_drone(frames[i], poses_m[i], self.world_size_m) # Draw observability mask: if params["draw_fov"]: print("Drawing FOV") for i, sample in enumerate(rollout): frames[i] = self.presenter.draw_observability( frames[i], poses_m[i], self.world_size_m, 84) # Visualize if False: for i, sample in enumerate(rollout): self.presenter.show_image(frames[i], "sample_image", scale=1, waitkey=True) return frames
def setEnvContext(self, context): print("Set env context to: " + str(context)) self.env_id = context["env_id"] self.env_img = env.load_env_img(self.env_id, 256, 256) self.env_img = self.env_img[:, :, [2, 1, 0]]
def __getitem__(self, idx): if self.seg_level: env_id = self.seg_list[idx][0] set_idx = self.seg_list[idx][1] seg_idx = self.seg_list[idx][2] else: env_id = self.env_list[idx] print("top_down_dataset_sm __getitem__ load_env_config") env_conf_json = load_env_config(env_id) landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim(env_conf_json) top_down_image = load_env_img(env_id) path = load_path(env_id) img_x = top_down_image.shape[0] img_y = top_down_image.shape[1] path_in_img_coords = self.cf_to_img(img_x, path) landmark_pos_in_img = self.as_to_img(img_x, np.asarray(landmark_positions)[:, 0:2]) self.pos_rand_image = self.pos_rand_range * img_x #self.plot_path_on_img(top_down_image, path_in_img_coords) #self.plot_path_on_img(top_down_image, landmark_pos_in_img) #cv2.imshow("top_down", top_down_image) #cv2.waitKey() input_images = [] input_instructions = [] label_images = [] aux_labels = [] # Somehow load the instruction with the start and end indices for each of the N segments if self.seg_level: instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] else: instruction_segments = self.all_instr[env_id][0]["instructions"] for seg_idx, seg in enumerate(instruction_segments): start_idx = seg["start_idx"] end_idx = seg["end_idx"] instruction = seg["instruction"] # TODO: Check for overflowz seg_path = path_in_img_coords[start_idx:end_idx] seg_img = top_down_image.copy() #test_plot = self.plot_path_on_img(seg_img, seg_path) # TODO: Validate the 0.5 choice, should it be 2? affine, cropsize = self.get_affine_matrix(seg_path, 0, [int(img_x / 2), int(img_y / 2)], 0.5) if affine is None: continue seg_img_rot = self.apply_affine(seg_img, affine, cropsize) seg_labels = np.zeros_like(seg_img[:, :, 0:1]).astype(float) seg_labels = self.plot_path_on_img(seg_labels, seg_path) seg_labels = gaussian_filter(seg_labels, 4) seg_labels_rot = self.apply_affine(seg_labels, affine, cropsize) #seg_labels_rot = gaussian_filter(seg_labels_rot, 4) seg_labels_rot = self.normalize_0_1(seg_labels_rot) # Change to true to visualize the paths / labels if False: cv2.imshow("rot_img", seg_img_rot) cv2.imshow("seg_labels", seg_labels_rot) rot_viz = seg_img_rot.astype(np.float64) / 512 rot_viz[:, :, 0] += seg_labels_rot.squeeze() cv2.imshow("rot_viz", rot_viz) cv2.waitKey(0) tok_instruction = tokenize_instruction(instruction, self.word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) # Get landmark classification labels landmark_pos_in_seg_img = self.apply_affine_on_pts(landmark_pos_in_img, affine) # Down-size images and labels if requested by the model if self.img_scale != 1.0: seg_img_rot = transform.resize( seg_img_rot, [seg_img_rot.shape[0] * self.img_scale, seg_img_rot.shape[1] * self.img_scale], mode="constant") seg_labels_rot = transform.resize( seg_labels_rot, [seg_labels_rot.shape[0] * self.img_scale, seg_labels_rot.shape[1] * self.img_scale], mode="constant") landmark_pos_in_seg_img = landmark_pos_in_seg_img * self.img_scale seg_img_rot = standardize_image(seg_img_rot) seg_labels_rot = standardize_image(seg_labels_rot) seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float() seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(0).float() landmark_pos_t = torch.from_numpy(landmark_pos_in_seg_img).unsqueeze(0) landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(0) mask1 = torch.gt(landmark_pos_t, 0) mask2 = torch.lt(landmark_pos_t, seg_img_t.size(2)) mask = mask1 * mask2 mask = mask[:, :, 0] * mask[:, :, 1] mask = mask landmark_pos_t = torch.masked_select(landmark_pos_t, mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2]) landmark_indices_t = torch.masked_select(landmark_indices_t, mask).view([-1]) mentioned_names, mentioned_indices = get_mentioned_landmarks(self.thesaurus, instruction) mentioned_labels_t = empty_float_tensor(list(landmark_indices_t.size())).long() for i, landmark_idx_present in enumerate(landmark_indices_t): if landmark_idx_present in mentioned_indices: mentioned_labels_t[i] = 1 aux_label = { "landmark_pos": landmark_pos_t, "landmark_indices": landmark_indices_t, "landmark_mentioned": mentioned_labels_t, "visible_mask": mask, } if self.include_instr_negatives: # If we are to be using similar instructions according to the json file, then # initialize choices with similar instructions. Otherwise let choices be empty, and they will # be filled in the following lines. if self.instr_negatives_similar_only: choices = self.similar_instruction_map[str(env_id)][str(seg_idx)] else: choices = [] # If there are no similar instructions to this instruction, pick a completely random instruction if len(choices) == 0: while len(choices) == 0: env_options = list(self.similar_instruction_map.keys()) random_env = random.choice(env_options) seg_options = list(self.similar_instruction_map[random_env].keys()) if len(seg_options) == 0: continue random_seg = random.choice(seg_options) choices = self.similar_instruction_map[random_env][random_seg] pick = random.choice(choices) picked_env = pick["env_id"] picked_seg = pick["seg_idx"] picked_set = pick["set_idx"] picked_instruction = self.all_instr[picked_env][picked_set]["instructions"][picked_seg]["instruction"] tok_fake_instruction = tokenize_instruction(picked_instruction, self.word2token) aux_label["negative_instruction"] = torch.LongTensor(tok_fake_instruction).unsqueeze(0) input_images.append(seg_img_t) input_instructions.append(instruction_t) label_images.append(seg_labels_t) aux_labels.append(aux_label) return [input_images, input_instructions, label_images, aux_labels]
def map_affine_test(): img = load_env_img(2, 128, 128) img = standardize_image(img) img = torch.from_numpy(img).float().unsqueeze(0) pos = np.asarray([15, 15, 0]) quat = euler.euler2quat(0, 0, 0) pose0 = Pose(pos[np.newaxis, :], quat[np.newaxis, :]) theta1 = 0.5 pos = np.asarray([15, 15, 0]) quat = euler.euler2quat(0, 0, theta1) pose1 = Pose(pos[np.newaxis, :], quat[np.newaxis, :]) D = 10.0 pos = np.asarray([15 + D * math.cos(theta1), 15 + D * math.sin(theta1), 0]) quat = euler.euler2quat(0, 0, theta1) pose2 = Pose(pos[np.newaxis, :], quat[np.newaxis, :]) affine = MapAffine(128, 128, 128) res1 = affine(img, pose0, pose1) res2 = affine(res1, pose1, pose2) res3 = affine(img, pose0, pose2) prof = SimpleProfiler(torch_sync=True, print=True) affinebig = MapAffine(128, 256, 128) prof.tick("init") res3big = affinebig(img, pose0, pose2) prof.tick("affinebig") img = load_env_img(2, 32, 32) img = standardize_image(img) img = torch.from_numpy(img).float().unsqueeze(0).cuda() affines = MapAffine(32, 64, 32).cuda() torch.cuda.synchronize() prof.tick("init") res3s = affines(img, pose0, pose2) prof.tick("affines") prof.print_stats() print("Start pose: ", pose0) print(" Pose 1: ", pose1) print(" Pose 2: ", pose2) print("Res2, Res3 and Res3Big should align!") Presenter().show_image(img[0], "img", torch=True, waitkey=False, scale=2) Presenter().show_image(res1.data[0], "res_1", torch=True, waitkey=False, scale=2) Presenter().show_image(res2.data[0], "res_2", torch=True, waitkey=False, scale=2) Presenter().show_image(res3.data[0], "res_3", torch=True, waitkey=False, scale=2) Presenter().show_image(res3big.data[0], "res3big", torch=True, waitkey=True, scale=2)