def execute(self, min_resolution=1080): """ Execute the demo, i.e. feed all of the desired input through the network and obtain predictions. Gracefully handles .txt, or video file (.mp4, etc), or directory input. """ logger.info('>>>>>>>>>>>>>>>> Start inference task >>>>>>>>>>>>>>>>') self.model.eval() """ Since overlaid class text is difficult to read below 1080p, we upsample predictions. """ logger.info(f'Write image prediction to {self.output_path}') rgb_img = imread_rgb(self.input_file) pred_label_img = self.execute_on_img(rgb_img) # avoid blurry images by upsampling RGB before overlaying text if np.amin(rgb_img.shape[:2]) < min_resolution: rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb') pred_label_img = resize_img_by_short_side(pred_label_img, min_resolution, 'label') imageio.imwrite(self.output_path, pred_label_img) logger.info( '<<<<<<<<<<<<<<<<< Inference task completed <<<<<<<<<<<<<<<<<')
def test_save_pred_vs_label_7tuple_short_side_60_img(): """ When the image is too low in resolution (e.g. short side 60), we cannot use cv2's text rendering code. Instead, we will also save the upsampled version. """ short_side = 60 # pixels data_dir = f'{TEST_DATA_ROOT}/Camvid_test_data' img_fpath = f'{data_dir}/images/0016E5_08159.png' label_fpath = f'{data_dir}/preds/0016E5_08159.png' img_rgb = imageio.imread(img_fpath) label_img = imageio.imread(label_fpath) img_rgb = resize_img_by_short_side(img_rgb, short_side, img_type='rgb') label_img = resize_img_by_short_side(label_img, short_side, img_type='label') img_h, img_w = label_img.shape pred_img = np.random.randint(0, 200, (img_h, img_w)).astype(np.uint16) id_to_class_name_map = get_dataloader_id_to_classname_map( 'pascal-context-460') save_fpath = f'{TEST_DATA_ROOT}/rand_549_temp_small.png' save_pred_vs_label_7tuple(img_rgb, pred_img, label_img, id_to_class_name_map, save_fpath) os.remove( f'{TEST_DATA_ROOT}/rand_549_temp_small_upsample_pred_labels_palette.png' ) os.remove(f'{TEST_DATA_ROOT}/rand_549_temp_small_pred_labels_palette.png')
def render_single_img_pred(self, min_resolution: int = 1080): """ Since overlaid class text is difficult to read below 1080p, we upsample predictions. """ in_fname_stem = Path(self.input_file).stem output_gray_fpath = f'{in_fname_stem}_gray.jpg' output_demo_fpath = f'{in_fname_stem}_overlaid_classes.jpg' logger.info(f'Write image prediction to {output_demo_fpath}') rgb_img = imread_rgb(self.input_file) pred_label_img = self.execute_on_img(rgb_img) # avoid blurry images by upsampling RGB before overlaying text if np.amin(rgb_img.shape[:2]) < min_resolution: rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb') pred_label_img = resize_img_by_short_side(pred_label_img, min_resolution, 'label') metadata = None frame_visualizer = Visualizer(rgb_img, metadata) overlaid_img = frame_visualizer.overlay_instances( label_map=pred_label_img, id_to_class_name_map=self.id_to_class_name_map) imageio.imwrite(output_demo_fpath, overlaid_img) imageio.imwrite(output_gray_fpath, pred_label_img)
def save_pred_vs_label_7tuple(img_rgb: np.ndarray, pred_img: np.ndarray, label_img: np.ndarray, id_to_class_name_map: Mapping[int,str], save_fpath: str) -> None: """ 7-tuple consists of (1-3) rgb mask 3-sequence for label, (4-6) rgb mask 3-sequence for predictions, (7) color palette Args: - img_rgb - pred_img - label_img - id_to_class_name_map - save_fpath Returns: - None """ img_h, img_w, _ = img_rgb.shape assert pred_img.shape == (img_h, img_w) assert label_img.shape == (img_h, img_w) if min(img_h, img_w) < MIN_DISCERNABLE_RES_FOR_TEXT: save_pred_vs_label_7tuple( img_rgb=resize_img_by_short_side(img_rgb.copy(), short_side_len=MIN_DISCERNABLE_RES_FOR_TEXT, img_type='rgb'), pred_img=resize_img_by_short_side(pred_img.copy(), short_side_len=MIN_DISCERNABLE_RES_FOR_TEXT, img_type='label'), label_img=resize_img_by_short_side(label_img.copy(), short_side_len=MIN_DISCERNABLE_RES_FOR_TEXT, img_type='label'), id_to_class_name_map=id_to_class_name_map, save_fpath=save_fpath.replace('.png', '_upsample.png') ) NUM_HSTACKED_IMGS = 3 hstack_img1 = form_mask_triple(img_rgb, label_img, save_fpath, save_to_disk=False) hstack_img2 = form_mask_triple(img_rgb, pred_img, save_fpath, save_to_disk=False) vstack_img1 = np.vstack([hstack_img1, hstack_img2]) save_dir = '/'.join(save_fpath.split('/')[:-1]) present_color_ids = np.union1d( np.unique(label_img), np.unique(pred_img)) num_present_colors = len(present_color_ids) max_colors_per_col = int(math.ceil(num_present_colors / NUM_HSTACKED_IMGS)) palette_img = form_contained_classes_color_guide( present_color_ids, id_to_class_name_map, '', '', save_to_disk=False, max_colors_per_col=max_colors_per_col ) vstack_img2 = vstack_img_with_palette(vstack_img1, palette_img) save_fpath = save_fpath.replace('.png', '_pred_labels_palette.png') cv2.imwrite(save_fpath, vstack_img2[:,:,::-1])
def execute_on_video(self, max_num_frames: int = 5000, min_resolution: int = 1080) -> None: """ input_file is a path to a video file. Read frames from an RGB video file, and write overlaid predictions into a new video file. Args: - None Returns: - None """ in_fname_stem = Path(self.input_file).stem out_fname = f'{in_fname_stem}_{self.args.model_name}_universal' out_fname += f'_scales_{self.scales_str}_base_sz_{self.args.base_size}.mp4' output_video_fpath = f'{_ROOT}/temp_files/{out_fname}' create_leading_fpath_dirs(output_video_fpath) logger.info(f'Write video to {output_video_fpath}') writer = VideoWriter(output_video_fpath) video_fpath = '/Users/johnlamb/Downloads/sample_ffmpeg.mp4' reader = VideoReader(self.input_file) for frame_idx in range(reader.num_frames): logger.info(f'On image {frame_idx}/{reader.num_frames}') rgb_img = reader.get_frame() if frame_idx > max_num_frames: break pred_label_img = self.execute_on_img(rgb_img) # avoid blurry images by upsampling RGB before overlaying text if np.amin(rgb_img.shape[:2]) < min_resolution: rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb') pred_label_img = resize_img_by_short_side( pred_label_img, min_resolution, 'label') metadata = None frame_visualizer = Visualizer(rgb_img, metadata) output_img = frame_visualizer.overlay_instances( label_map=pred_label_img, id_to_class_name_map=self.id_to_class_name_map) writer.add_frame(output_img) reader.complete() writer.complete()
def test_resize_img_by_short_side_label2(): """ Downsample 100x200 image with random content to 10x20. """ label = np.random.randint(0, 255, size=(200, 100)) label = label.astype(np.uint8) label_resized = resize_img_by_short_side(label, short_side_len=10, img_type='label') assert label_resized.shape == (20, 10) assert label_resized.dtype == np.uint8
def test_resize_img_by_short_side_rgb(): """ """ img_rgb = np.random.randn(800, 200, 3) img_rgb *= 255 img_rgb = np.clip(img_rgb, 0, 255) img_rgb = img_rgb.astype(np.uint8) img_rgb_resized = resize_img_by_short_side(img_rgb, short_side_len=10, img_type='rgb') assert img_rgb_resized.shape == (40, 10, 3) assert img_rgb_resized.dtype == np.uint8