def draw_agent(self, map, agent_center_coord, agent_rotation, agent_radius_px=5): maps.draw_agent(map, agent_center_coord, agent_rotation, agent_radius_px=agent_radius_px) return map
def draw_top_down_map(info, heading, output_size): """Generates a map that displays the state of the agent in the given environment, for the current frame. Args: info: environment info for current frame. heading: where the agent heads toward. output_size: height of output map. Returns: the output_size x width x 1 map """ top_down_map = maps.colorize_topdown_map( info["top_down_map"]["map"], info["top_down_map"]["fog_of_war_mask"]) original_map_size = top_down_map.shape[:2] map_scale = np.array( (1, original_map_size[1] * 1.0 / original_map_size[0])) new_map_size = np.round(output_size * map_scale).astype(np.int32) # OpenCV expects w, h but map size is in h, w top_down_map = cv2.resize(top_down_map, (new_map_size[1], new_map_size[0])) map_agent_pos = info["top_down_map"]["agent_map_coord"] map_agent_pos = np.round(map_agent_pos * new_map_size / original_map_size).astype(np.int32) top_down_map = maps.draw_agent( top_down_map, map_agent_pos, heading - np.pi / 2, agent_radius_px=top_down_map.shape[0] / 40, ) return top_down_map
def generate_map_frame(env, obs_dim): """Generates a map that displays the state of the agent in the given environment, for the current frame. Args: env: sim environment. obs_dim: dimension of observations. Returns: the height x width x 1 map """ # draw map top_down_map = maps.get_topdown_map(sim=env.sim, map_resolution=(1250, 1250, 1), num_samples=200) # draw agent # TODO: figure out how to convert quaternion to angles to get agent_rotation top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=(1, 1), agent_rotation=0.0, agent_radius_px=8, ) # scale top down map to align with rgb view old_h, old_w, _ = top_down_map.shape top_down_height = obs_dim[0] top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map, (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) top_down_map = np.expand_dims(top_down_map, axis=2) return top_down_map
def observations_to_image(observation: Dict, info: Dict) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). Returns: generated image of a single frame. """ egocentric_view = [] if "rgb" in observation: observation_size = observation["rgb"].shape[0] egocentric_view.append(observation["rgb"][:, :, :3]) # draw depth map if observation has depth info if "depth" in observation: observation_size = observation["depth"].shape[0] depth_map = (observation["depth"].squeeze() * 255).astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view.append(depth_map) assert (len(egocentric_view) > 0), "Expected at least one visual sensor enabled." egocentric_view = np.concatenate(egocentric_view, axis=1) # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map, (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) frame = np.concatenate((egocentric_view, top_down_map), axis=1) return frame
def draw_top_down_map(info): top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map(top_down_map) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 25, ) return top_down_map
def plot_top_down_map(info, dataset='replica', pred=None): top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] if dataset == 'replica': agent_radius_px = top_down_map.shape[0] // 16 else: agent_radius_px = top_down_map.shape[0] // 50 top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=agent_radius_px) if pred is not None: from habitat.utils.geometry_utils import quaternion_rotate_vector source_rotation = info["top_down_map"]["agent_rotation"] rounded_pred = np.round(pred[1]) direction_vector_agent = np.array( [rounded_pred[1], 0, -rounded_pred[0]]) direction_vector = quaternion_rotate_vector(source_rotation, direction_vector_agent) grid_size = ( (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, ) delta_x = int(-direction_vector[0] / grid_size[0]) delta_y = int(direction_vector[2] / grid_size[1]) x = np.clip(map_agent_pos[0] + delta_x, a_min=0, a_max=top_down_map.shape[0]) y = np.clip(map_agent_pos[1] + delta_y, a_min=0, a_max=top_down_map.shape[1]) point_padding = 20 for m in range(x - point_padding, x + point_padding + 1): for n in range(y - point_padding, y + point_padding + 1): if np.linalg.norm(np.array([m - x, n - y])) <= point_padding and \ 0 <= m < top_down_map.shape[0] and 0 <= n < top_down_map.shape[1]: top_down_map[m, n] = (0, 255, 255) if np.linalg.norm(rounded_pred) < 1: assert delta_x == 0 and delta_y == 0 if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) return top_down_map
def plot_top_down_map(info, dataset='replica'): top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] if dataset == 'replica': agent_radius_px = top_down_map.shape[0] // 16 else: agent_radius_px = top_down_map.shape[0] // 50 top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=agent_radius_px) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) return top_down_map
def draw_top_down_map(info, heading, output_size): top_down_map = maps.colorize_topdown_map(info["top_down_map"]["map"]) original_map_size = top_down_map.shape[:2] map_scale = np.array( (1, original_map_size[1] * 1.0 / original_map_size[0])) new_map_size = np.round(output_size * map_scale).astype(np.int32) # OpenCV expects w, h but map size is in h, w top_down_map = cv2.resize(top_down_map, (new_map_size[1], new_map_size[0])) map_agent_pos = info["top_down_map"]["agent_map_coord"] map_agent_pos = np.round(map_agent_pos * new_map_size / original_map_size).astype(np.int32) top_down_map = maps.draw_agent( top_down_map, map_agent_pos, heading - np.pi / 2, agent_radius_px=top_down_map.shape[0] / 40, ) return top_down_map
def topdown_to_image(topdown_info: np.ndarray) -> np.ndarray: r"""Convert topdown map to an RGB image. """ top_down_map = topdown_info["map"] fog_of_war_mask = topdown_info["fog_of_war_mask"] top_down_map = maps.colorize_topdown_map(top_down_map, fog_of_war_mask) map_agent_pos = topdown_info["agent_map_coord"] # Add zero padding min_map_size = 200 if top_down_map.shape[0] != top_down_map.shape[1]: H = top_down_map.shape[0] W = top_down_map.shape[1] if H > W: pad_value = (H - W) // 2 padding = ((0, 0), (pad_value, pad_value), (0, 0)) map_agent_pos = (map_agent_pos[0], map_agent_pos[1] + pad_value) else: pad_value = (W - H) // 2 padding = ((pad_value, pad_value), (0, 0), (0, 0)) map_agent_pos = (map_agent_pos[0] + pad_value, map_agent_pos[1]) top_down_map = np.pad(top_down_map, padding, mode="constant", constant_values=255) if top_down_map.shape[0] < min_map_size: H, W = top_down_map.shape[:2] top_down_map = cv2.resize(top_down_map, (min_map_size, min_map_size)) map_agent_pos = ( int(map_agent_pos[0] * min_map_size // H), int(map_agent_pos[1] * min_map_size // W), ) top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=topdown_info["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) return top_down_map
path_point[2], path_point[0], grid_dimensions, pathfinder=sim.pathfinder, ) for path_point in path_points ] grid_tangent = mn.Vector2( trajectory[1][1] - trajectory[0][1], trajectory[1][0] - trajectory[0][0] ) path_initial_tangent = grid_tangent / grid_tangent.length() initial_angle = math.atan2(path_initial_tangent[0], path_initial_tangent[1]) # draw the agent and trajectory on the map maps.draw_path(top_down_map, trajectory) maps.draw_agent( top_down_map, trajectory[0], initial_angle, agent_radius_px=8 ) print("\nDisplay the map with agent and path overlay:") display_map(top_down_map) # @markdown 4. (optional) Place agent and render images at trajectory points (if found). display_path_agent_renders = True # @param{type:"boolean"} if display_path_agent_renders: print("Rendering observations at path points:") tangent = path_points[1] - path_points[0] agent_state = habitat_sim.AgentState() for ix, point in enumerate(path_points): if ix < len(path_points) - 1: tangent = path_points[ix + 1] - point agent_state.position = point tangent_orientation_matrix = mn.Matrix4.look_at(
def observations_to_image(observation: Dict, info: Dict) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). Returns: generated image of a single frame. """ observation_size = observation["rgb"].shape[0] egocentric_view = observation["rgb"][:, :, -3:] # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) if "goal_coord_in_camera" in observation: _, _, _, xpx, ypx = observation["goal_coord_in_camera"] if xpx != -1 and ypx != -1: xpx = int(xpx * observation_size + observation_size / 2) ypx = int(ypx * observation_size + observation_size / 2) egocentric_view = cv2.circle(egocentric_view, (xpx, ypx), 15, (0, 0, 255), 5) # draw depth map if observation has depth info if "depth" in observation: depth_map = (observation["depth"][:, :, -1] * 255).astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view = np.concatenate((egocentric_view, depth_map), axis=1) if "goalclass" in observation: from habitat.tasks.nav.nav_task_multi_goal import CLASSES index = np.nonzero(observation["goalclass"])[0][0] classes = list(CLASSES.keys()) class_name = classes[index] cv2.putText(egocentric_view, class_name, (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255)) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map, (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) frame = np.concatenate((egocentric_view, top_down_map), axis=1) return frame
def obs_to_images(obs): img = obs["rgb"].copy() images = [img.transpose(0, 2, 3, 1)] # Draw top down view if "visited_grid" in obs: top_down_map = obs["visited_grid"][0, ...] elif "top_down_map" in obs: top_down_map = maps.colorize_topdown_map(obs["top_down_map"]["map"]) map_size = 1024 original_map_size = top_down_map.shape[:2] if original_map_size[0] > original_map_size[1]: map_scale = np.array( (1, original_map_size[1] * 1.0 / original_map_size[0])) else: map_scale = np.array( (original_map_size[0] * 1.0 / original_map_size[1], 1)) new_map_size = np.round(map_size * map_scale).astype(np.int32) # OpenCV expects w, h but map size is in h, w top_down_map = cv2.resize(top_down_map, (new_map_size[1], new_map_size[0])) map_agent_pos = obs["top_down_map"]["agent_map_coord"] map_agent_pos = np.round(map_agent_pos * new_map_size / original_map_size).astype(np.int32) top_down_map = maps.draw_agent(top_down_map, map_agent_pos, obs["heading"] - np.pi / 2, agent_radius_px=top_down_map.shape[0] / 40) else: top_down_map = None normalize = [True] titles = [( ("Method: %s" % obs["method"].replace("_", " ")), ("Step: %03d Reward: %.3f" % (obs["step"][0], obs.get("reward", [0])[0])), ("Action: %s" % string.capwords(obs["action_taken_name"].replace("_", " "))), )] images.append(top_down_map) if "visited" in obs: titles.append((("Visited Cube Count: %d" % obs["visited"][0]), )) elif "distance_from_start" in obs: titles.append("Geo Dist From Origin: %.3f" % obs["distance_from_start"][0]) elif "pointgoal" in obs: titles.append((("Euc Dist: %.3f" % obs["pointgoal"][0, 0]), ("Geo Dist: %.3f" % obs["goal_geodesic_distance"][0]))) normalize.append(False) for key, val in obs.items(): if key == "depth" or key == "output_depth": normalize.append(False) val = val[:, 0, ...] depth = np.clip(val, -0.5, 0.5) depth += 0.5 depth *= 255 titles.append(key) depth = depth.astype(np.uint8) depth = np.reshape(depth, (-1, depth.shape[-1])) images.append(depth) elif key == "surface_normals" or key == "output_surface_normals": titles.append(key) normalize.append(False) val = val.copy() if key == "output_surface_normals": # Still need to be normalized val /= np.sqrt(np.sum(val**2, axis=1, keepdims=True)) surfnorm = (np.clip( (val + 1), 0, 2) * 127).astype(np.uint8).transpose( (0, 2, 3, 1)) images.append(surfnorm) elif key == "semantic": titles.append(key) normalize.append(False) seg = (val * 314.159 % 255).astype(np.uint8) seg = np.reshape(seg, (-1, seg.shape[-1])) images.append(seg) elif key == "output_reconstruction": titles.append(key) normalize.append(False) val = np.clip(val, -0.5, 0.5) val += 0.5 val *= 255 val = val.astype(np.uint8).transpose((0, 2, 3, 1)) images.append(val) elif key in { "action_prob", "action_taken", "egomotion_pred", "best_next_action" }: if key == "action_prob": titles.append(("Output Distribution", "p(Forward) p(Left) p(Right)")) else: titles.append(key) if val is not None: normalize.append(True) prob_hists = np.concatenate( [draw_probability_hist(pi) for pi in val.copy()], axis=0) images.append(prob_hists) else: images.append(None) normalize.append(False) images.append(top_down_map) normalize.append(True) titles = [ string.capwords(title.replace("_", " ")) if isinstance(title, str) else title for title in titles ] return images, titles, normalize
def observations_to_image(observation: Dict, info: Dict, reward, weights_output=None, aux_tasks=[]) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). reward: float to append weights_output: attention weights for viz Returns: generated image of a single frame. """ egocentric_view = [] if "rgb" in observation: observation_size = observation["rgb"].shape[0] rgb = observation["rgb"] if not isinstance(rgb, np.ndarray): rgb = rgb.cpu().numpy() egocentric_view.append(rgb) # draw depth map if observation has depth info if "depth" in observation: observation_size = observation["depth"].shape[0] depth_map = observation["depth"].squeeze() * 255.0 if not isinstance(depth_map, np.ndarray): depth_map = depth_map.cpu().numpy() depth_map = depth_map.astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view.append(depth_map) assert ( len(egocentric_view) > 0 ), "Expected at least one visual sensor enabled." egocentric_view = np.concatenate(egocentric_view, axis=1) # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"] ) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map, (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) frame = np.concatenate((egocentric_view, top_down_map), axis=1) if weights_output is not None and len(aux_tasks) > 1: # add a strip to the right of the video strip_height = observation_size # ~256 -> we'll have 5-10 tasks, let's do 24 pixels each strip_gap = 24 strip_width = strip_gap + 12 strip = np.ones((strip_height, strip_width, 3), dtype=np.uint8) * 255 # white bg num_tasks = weights_output.size(0) total_height = num_tasks * strip_gap offset = int((strip_height - total_height)/2) assert offset > 0, "too many aux tasks to visualize" for i in range(num_tasks): start_height = i * strip_gap + offset strength = int(255 * weights_output[i]) color = np.array([strength, 0, 0]) if weights_output[i] > 1.001: raise Exception(f"weights is {weights_output}, that's too big") strip[start_height: start_height + strip_gap] = color task_name = AUX_ABBREV.get(aux_tasks[i], aux_tasks[i]) task_abbrev = task_name[:3] cv2.putText(img=strip, text=f"{task_abbrev}", org=(2, int(start_height + strip_gap / 2)), fontFace=2, fontScale=.4, color=(256, 256, 256), thickness=1) frame = np.concatenate((frame, strip), axis=1) return frame
def observations_to_image(observation: Dict, info: Dict, pred=None) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). Returns: generated image of a single frame. """ egocentric_view = [] if "rgb" in observation: observation_size = observation["rgb"].shape[0] rgb = observation["rgb"] if not isinstance(rgb, np.ndarray): rgb = rgb.cpu().numpy() egocentric_view.append(rgb) # draw depth map if observation has depth info if "depth" in observation: observation_size = observation["depth"].shape[0] depth_map = observation["depth"].squeeze() * 255.0 if not isinstance(depth_map, np.ndarray): depth_map = depth_map.cpu().numpy() depth_map = depth_map.astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view.append(depth_map) assert (len(egocentric_view) > 0), "Expected at least one visual sensor enabled." egocentric_view = np.concatenate(egocentric_view, axis=1) # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if pred is not None: from habitat.utils.geometry_utils import quaternion_rotate_vector # current_position = sim.get_agent_state().position # agent_state = sim.get_agent_state() source_rotation = info["top_down_map"]["agent_rotation"] rounded_pred = np.round(pred[1]) direction_vector_agent = np.array( [rounded_pred[1], 0, -rounded_pred[0]]) direction_vector = quaternion_rotate_vector( source_rotation, direction_vector_agent) # pred_goal_location = source_position + direction_vector.astype(np.float32) grid_size = ( (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, ) delta_x = int(-direction_vector[0] / grid_size[0]) delta_y = int(direction_vector[2] / grid_size[1]) x = np.clip(map_agent_pos[0] + delta_x, a_min=0, a_max=top_down_map.shape[0]) y = np.clip(map_agent_pos[1] + delta_y, a_min=0, a_max=top_down_map.shape[1]) point_padding = 12 for m in range(x - point_padding, x + point_padding + 1): for n in range(y - point_padding, y + point_padding + 1): if np.linalg.norm(np.array([m - x, n - y])) <= point_padding and \ 0 <= m < top_down_map.shape[0] and 0 <= n < top_down_map.shape[1]: top_down_map[m, n] = (0, 255, 255) if np.linalg.norm(rounded_pred) < 1: assert delta_x == 0 and delta_y == 0 if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view if pred is None: old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map.astype(np.float32), (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) else: # draw label CATEGORY_INDEX_MAPPING = { 'chair': 0, 'table': 1, 'picture': 2, 'cabinet': 3, 'cushion': 4, 'sofa': 5, 'bed': 6, 'chest_of_drawers': 7, 'plant': 8, 'sink': 9, 'toilet': 10, 'stool': 11, 'towel': 12, 'tv_monitor': 13, 'shower': 14, 'bathtub': 15, 'counter': 16, 'fireplace': 17, 'gym_equipment': 18, 'seating': 19, 'clothes': 20 } index2label = {v: k for k, v in CATEGORY_INDEX_MAPPING.items()} pred_label = index2label[pred[0]] text_height = int(observation_size * 0.1) old_h, old_w, _ = top_down_map.shape top_down_height = observation_size - text_height top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map.astype(np.float32), (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) top_down_map = np.concatenate([ np.ones([text_height, top_down_map.shape[1], 3], dtype=np.int32) * 255, top_down_map ], axis=0) top_down_map = cv2.putText(top_down_map, 'C_t: ' + pred_label.replace('_', ' '), (10, text_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 2, cv2.LINE_AA) frame = np.concatenate((egocentric_view, top_down_map), axis=1) return frame