def draw_predictions_dual( input: dict, output: dict, image_id_key="image_id", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), class_colors=[ (0, 0, 0), # 0=background (0, 255, 0), # no damage (or just 'building' for localization) (green) (255, 255, 0), # minor damage (yellow) (255, 128, 0), # major damage (red) (255, 0, 0), # destroyed (red) ], ): images = [] num_images = len(input[image_id_key]) for i, image_id in enumerate(range(num_images)): image_pre = rgb_image_from_tensor(input[INPUT_IMAGE_PRE_KEY][i], mean, std) image_pre = cv2.cvtColor(image_pre, cv2.COLOR_RGB2BGR) image_post = rgb_image_from_tensor(input[INPUT_IMAGE_POST_KEY][i], mean, std) image_post = cv2.cvtColor(image_post, cv2.COLOR_RGB2BGR) image_pre_gt = image_pre.copy() image_post_gt = image_post.copy() localization_target = to_numpy(input[INPUT_MASK_PRE_KEY][i].squeeze(0)) damage_target = to_numpy(input[INPUT_MASK_POST_KEY][i]) image_pre_gt = overlay_image_and_mask(image_pre_gt, localization_target, class_colors) image_post_gt = overlay_image_and_mask(image_post_gt, damage_target, class_colors) localization_predictions = to_numpy( output[OUTPUT_MASK_PRE_KEY][i].squeeze(0).sigmoid() > 0.5).astype( np.uint8) damage_predictions = to_numpy( output[OUTPUT_MASK_POST_KEY][i]).argmax(axis=0) image_pre = overlay_image_and_mask(image_pre, localization_predictions, class_colors) image_post = overlay_image_and_mask(image_post, damage_predictions, class_colors) overlay_gt = np.column_stack([image_pre_gt, image_post_gt]) overlay = np.column_stack([image_pre, image_post]) overlay = np.row_stack([overlay_gt, overlay]) overlay = longest_max_size(overlay, 1024, cv2.INTER_LINEAR) cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) images.append(overlay) return images
def draw_predictions( input: dict, output: dict, image_id_key="image_id", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), class_colors=[ (0, 0, 0), # 0=background (0, 255, 0), # no damage (or just 'building' for localization) (green) (255, 255, 0), # minor damage (yellow) (255, 128, 0), # major damage (red) (255, 0, 0), # destroyed (red) (127, 127, 127) ], max_images=32): images = [] num_images = len(input[image_id_key]) for i in range(num_images): image_id = input[INPUT_IMAGE_ID_KEY][i] image_pre = rgb_image_from_tensor(input[INPUT_IMAGE_KEY][i, 0:3, ...], mean, std) image_pre = cv2.cvtColor(image_pre, cv2.COLOR_RGB2BGR) image_post = rgb_image_from_tensor(input[INPUT_IMAGE_KEY][i, 3:6, ...], mean, std) image_post = cv2.cvtColor(image_post, cv2.COLOR_RGB2BGR) image_pre_gt = image_pre.copy() image_post_gt = image_post.copy() damage_target = to_numpy(input[INPUT_MASK_KEY][i]) image_pre_gt = overlay_image_and_mask(image_pre_gt, damage_target, class_colors) image_post_gt = overlay_image_and_mask(image_post_gt, damage_target, class_colors) damage_predictions = to_numpy(output[INPUT_MASK_KEY][i]).argmax(axis=0) image_pre = overlay_image_and_mask(image_pre, damage_predictions, class_colors) image_post = overlay_image_and_mask(image_post, damage_predictions, class_colors) overlay_gt = np.column_stack([image_pre_gt, image_post_gt]) overlay = np.column_stack([image_pre, image_post]) overlay = np.row_stack([overlay_gt, overlay]) overlay = longest_max_size(overlay, 1024, cv2.INTER_LINEAR) cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) images.append(overlay) if len(images) >= max_images: break return images
def draw_binary_segmentation_predictions(input: dict, output: dict, image_key='features', image_id_key='image_id', targets_key='targets', outputs_key='logits', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): images = [] image_id_input = input[image_id_key] if image_id_key is not None else [None] * len(input[image_key]) for image, target, image_id, logits in zip(input[image_key], input[targets_key], image_id_input, output[outputs_key]): image = rgb_image_from_tensor(image, mean, std) target = to_numpy(target).squeeze(0) logits = to_numpy(logits).squeeze(0) overlay = image.copy() true_mask = target > 0 pred_mask = logits > 0 overlay[true_mask & pred_mask] = np.array([0, 250, 0], dtype=overlay.dtype) # Correct predictions (Hits) painted with green overlay[true_mask & ~pred_mask] = np.array([250, 0, 0], dtype=overlay.dtype) # Misses painted with red overlay[~true_mask & pred_mask] = np.array([250, 250, 0], dtype=overlay.dtype) # False alarm painted with yellow overlay = cv2.addWeighted(image, 0.5, overlay, 0.5, 0, dtype=cv2.CV_8U) if image_id is not None: cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) images.append(overlay) return images
def visualize_inria_predictions(input: dict, output: dict, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): images = [] for image, target, image_id, logits in zip(input['features'], input['targets'], input['image_id'], output['logits']): image = rgb_image_from_tensor(image, mean, std) target = to_numpy(target).squeeze(0) logits = to_numpy(logits).squeeze(0) overlay = np.zeros_like(image) true_mask = target > 0 pred_mask = logits > 0 overlay[true_mask & pred_mask] = np.array( [0, 250, 0], dtype=overlay.dtype ) # Correct predictions (Hits) painted with green overlay[true_mask & ~pred_mask] = np.array( [250, 0, 0], dtype=overlay.dtype) # Misses painted with red overlay[~true_mask & pred_mask] = np.array( [250, 250, 0], dtype=overlay.dtype) # False alarm painted with yellow overlay = cv2.addWeighted(image, 0.5, overlay, 0.5, 0, dtype=cv2.CV_8U) cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) images.append(overlay) return images
def draw_semantic_segmentation_predictions(input: dict, output: dict, class_colors, mode='overlay', image_key='features', image_id_key='image_id', targets_key='targets', outputs_key='logits', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): assert mode in {'overlay', 'side-by-side'} images = [] image_id_input = input[image_id_key] if image_id_key is not None else [ None ] * len(input[image_key]) for image, target, image_id, logits in zip(input[image_key], input[targets_key], image_id_input, output[outputs_key]): image = rgb_image_from_tensor(image, mean, std) logits = to_numpy(logits).argmax(axis=0) target = to_numpy(target) if mode == 'overlay': overlay = image.copy() for class_index, class_color in enumerate(class_colors): overlay[logits == class_index, :] = class_color overlay = cv2.addWeighted(image, 0.5, overlay, 0.5, 0, dtype=cv2.CV_8U) if image_id is not None: cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) elif mode == 'side-by-side': true_mask = np.zeros_like(image) for class_index, class_color in enumerate(class_colors): true_mask[target == class_index, :] = class_color pred_mask = np.zeros_like(image) for class_index, class_color in enumerate(class_colors): pred_mask[logits == class_index, :] = class_color overlay = np.hstack((image, true_mask, pred_mask)) else: raise ValueError(mode) images.append(overlay) return images
def draw_regression_predictions(input: dict, output: dict, class_names, image_key='image', image_id_key='image_id', targets_key='targets', outputs_key='regression', unsupervised_label=None, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): images = [] for i, (image, target, image_id) in enumerate( zip(input[image_key], input[targets_key], input[image_id_key])): diagnosis = output[outputs_key][i] image = rgb_image_from_tensor(image, mean, std) target = int(to_numpy(target).squeeze(0)) predicted_target = int(regression_to_class(diagnosis)) overlay = image.copy() if 'stn' in output: stn = rgb_image_from_tensor(output['stn'][i], mean, std) overlay = np.hstack((overlay, stn)) cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) if target != unsupervised_label: cv2.putText(overlay, f'{class_names[target]} ({target})', (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (0, 250, 0)) else: cv2.putText(overlay, f'Unlabeled ({target})', (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (0, 250, 0)) cv2.putText( overlay, f'{class_names[predicted_target]} ({predicted_target}/{float(diagnosis)})', (10, 45), cv2.FONT_HERSHEY_PLAIN, 1, (0, 250, 250)) images.append(overlay) return images
def visualize_canny_predictions(input, output, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): images = [] for image, target, logits in zip(input['features'], input['targets'], output['logits']): image = rgb_image_from_tensor(image, mean, std) target = to_numpy(target).squeeze(0) logits = to_numpy(logits.sigmoid()).squeeze(0) overlay = np.zeros_like(image) overlay[logits > 0.5] += np.array([255, 0, 0], dtype=overlay.dtype) overlay[target > 0] += np.array([0, 255, 0], dtype=overlay.dtype) overlay = cv2.addWeighted(image, 0.5, overlay, 0.5, 0, dtype=cv2.CV_8U) images.append(overlay) return images
def draw_classification_predictions(input: dict, output: dict, class_names, image_key='image', image_id_key='image_id', targets_key='targets', outputs_key='logits', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): images = [] for image, target, image_id, logits in zip(input[image_key], input[targets_key], input[image_id_key], output[outputs_key]): image = rgb_image_from_tensor(image, mean, std) num_classes = logits.size(0) target = int(to_numpy(target).squeeze(0)) if num_classes == 1: logits = int(to_numpy(logits).squeeze(0) > 0) else: logits = np.argmax(to_numpy(logits)) overlay = image.copy() if target != UNLABELED_CLASS: target_name = class_names[target] else: target_name = 'Unlabeled' cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) cv2.putText(overlay, target_name, (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (0, 250, 0)) if target == logits: cv2.putText(overlay, class_names[logits], (10, 45), cv2.FONT_HERSHEY_PLAIN, 1, (0, 250, 0)) else: cv2.putText(overlay, class_names[logits], (10, 45), cv2.FONT_HERSHEY_PLAIN, 1, (250, 0, 0)) images.append(overlay) return images
def visualize_predictions(input: dict, output: dict, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): images = [] for image, target, image_id, logits in zip(input['features'], input['targets'], input['image_id'], output['logits']): image = rgb_image_from_tensor(image, mean, std) # target = to_numpy(target).squeeze(0) logits = to_numpy(logits).argmax(axis=0) overlay = image.copy() for class_index, class_color in enumerate(COLORS): image[logits == class_index, :] = class_color overlay = cv2.addWeighted(image, 0.5, overlay, 0.5, 0, dtype=cv2.CV_8U) cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) images.append(overlay) return images
def test_tiles_split_merge_non_dividable_cuda(): image = np.random.random((5632, 5120, 3)).astype(np.uint8) tiler = ImageSlicer(image.shape, tile_size=(1280, 1280), tile_step=(1280, 1280), weight='mean') tiles = tiler.split(image) merger = CudaTileMerger(tiler.target_shape, channels=image.shape[2], weight=tiler.weight) for tile, coordinates in zip(tiles, tiler.crops): # Integrate as batch of size 1 merger.integrate_batch( tensor_from_rgb_image(tile).unsqueeze(0).float().cuda(), [coordinates]) merged = merger.merge() merged = rgb_image_from_tensor(merged, mean=0, std=1, max_pixel_value=1) merged = tiler.crop_to_orignal_size(merged) np.testing.assert_equal(merged, image)
def draw_inria_predictions( input: dict, output: dict, inputs_to_labels: Callable, outputs_to_labels: Callable, image_key="features", image_id_key: Optional[str] = "image_id", targets_key="targets", outputs_key="logits", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_images=None, image_format: Union[str, Callable] = "bgr", ) -> List[np.ndarray]: """ Render visualization of model's prediction for binary segmentation problem. This function draws a color-coded overlay on top of the image, with color codes meaning: - green: True positives - red: False-negatives - yellow: False-positives :param input: Input batch (model's input batch) :param output: Output batch (model predictions) :param image_key: Key for getting image :param image_id_key: Key for getting image id/fname :param targets_key: Key for getting ground-truth mask :param outputs_key: Key for getting model logits for predicted mask :param mean: Mean vector user during normalization :param std: Std vector user during normalization :param max_images: Maximum number of images to visualize from batch (If you have huge batch, saving hundreds of images may make TensorBoard slow) :param targets_threshold: Threshold to convert target values to binary. Default value 0.5 is safe for both smoothed and hard labels. :param logits_threshold: Threshold to convert model predictions (raw logits) values to binary. Default value 0.0 is equivalent to 0.5 after applying sigmoid activation :param image_format: Source format of the image tensor to conver to RGB representation. Can be string ("gray", "rgb", "brg") or function `convert(np.ndarray)->nd.ndarray`. :return: List of images """ images = [] num_samples = len(input[image_key]) if max_images is not None: num_samples = min(num_samples, max_images) true_masks = to_numpy(inputs_to_labels(input[targets_key])).astype(bool) pred_masks = to_numpy(outputs_to_labels(output[outputs_key])).astype(bool) for i in range(num_samples): image = rgb_image_from_tensor(input[image_key][i], mean, std) if image_format == "bgr": image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) elif image_format == "gray": image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) elif hasattr(image_format, "__call__"): image = image_format(image) overlay = image.copy() true_mask = true_masks[i] pred_mask = pred_masks[i] overlay[true_mask & pred_mask] = np.array( [0, 250, 0], dtype=overlay.dtype ) # Correct predictions (Hits) painted with green overlay[true_mask & ~pred_mask] = np.array( [250, 0, 0], dtype=overlay.dtype) # Misses painted with red overlay[~true_mask & pred_mask] = np.array( [250, 250, 0], dtype=overlay.dtype) # False alarm painted with yellow overlay = cv2.addWeighted(image, 0.5, overlay, 0.5, 0, dtype=cv2.CV_8U) if OUTPUT_OFFSET_KEY in output: offset = to_numpy(output[OUTPUT_OFFSET_KEY][i]) * 32 offset = np.expand_dims(offset, -1) x = offset[0, ...].clip(min=0, max=1) * np.array([255, 0, 0]) + ( -offset[0, ...]).clip(min=0, max=1) * np.array([0, 0, 255]) y = offset[1, ...].clip(min=0, max=1) * np.array([255, 0, 255]) + ( -offset[1, ...]).clip(min=0, max=1) * np.array([0, 255, 0]) offset = (x + y).clip(0, 255).astype(np.uint8) offset = cv2.resize(offset, (image.shape[1], image.shape[0])) overlay = np.row_stack([overlay, offset]) dsv_inputs = [ OUTPUT_MASK_2_KEY, OUTPUT_MASK_4_KEY, OUTPUT_MASK_8_KEY, OUTPUT_MASK_16_KEY, OUTPUT_MASK_32_KEY ] for dsv_input_key in dsv_inputs: if dsv_input_key in output: dsv_p = to_numpy(output[dsv_input_key] [i].detach().float().sigmoid().squeeze(0)) dsv_p = cv2.resize((dsv_p * 255).astype(np.uint8), (image.shape[1], image.shape[0])) dsv_p = cv2.cvtColor(dsv_p, cv2.COLOR_GRAY2RGB) overlay = np.row_stack([overlay, dsv_p]) if image_id_key is not None and image_id_key in input: image_id = input[image_id_key][i] cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) images.append(overlay) return images
def draw_predictions(input: dict, output: dict, mean=0.0, std=1.0, max_images=16): images = [] num_images = len(input[INPUT_IMAGE_ID_KEY]) for i in range(num_images): image_id = input[INPUT_IMAGE_ID_KEY][i] if INPUT_IMAGE_KEY in input: image = rgb_image_from_tensor(input[INPUT_IMAGE_KEY][i], mean, std, max_pixel_value=1) elif INPUT_FEATURES_JPEG_FLOAT in input: image = rgb_image_from_tensor(input[INPUT_FEATURES_JPEG_FLOAT][i], mean, std, max_pixel_value=1) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) overlay = image.copy() true_type = int(input[INPUT_TRUE_MODIFICATION_TYPE][i]) true_flag = float(input[INPUT_TRUE_MODIFICATION_FLAG][i]) pred_type = int(output[OUTPUT_PRED_MODIFICATION_TYPE][i].argmax()) pred_flag = float(output[OUTPUT_PRED_MODIFICATION_FLAG][i].sigmoid()) header = np.zeros((40, overlay.shape[1], 3), dtype=np.uint8) + 40 cv2.putText(header, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250)) cv2.putText( header, f"true_type={true_type}/pred_type={pred_type} true_flag={true_flag:.2f}/pred_flag={pred_flag:.2f}", (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250), ) overlay = np.row_stack([header, overlay]) if INPUT_TRUE_MODIFICATION_MASK in input and OUTPUT_PRED_MODIFICATION_MASK in output: true_mask = to_numpy(input[INPUT_TRUE_MODIFICATION_MASK][i, 0] > 0) pred_mask = to_numpy( output[OUTPUT_PRED_MODIFICATION_MASK][i, 0] > 0) mask_overlay = image.copy() mask_overlay[true_mask & pred_mask] = np.array( [0, 250, 0], dtype=mask_overlay.dtype ) # Correct predictions (Hits) painted with green mask_overlay[true_mask & ~pred_mask] = np.array( [250, 0, 0], dtype=mask_overlay.dtype) # Misses painted with red mask_overlay[~true_mask & pred_mask] = np.array( [250, 250, 0], dtype=mask_overlay.dtype) # False alarm painted with yellow mask_overlay = cv2.addWeighted(image, 0.5, mask_overlay, 0.5, 0, dtype=cv2.CV_8U) mask_overlay = np.row_stack([header, mask_overlay]) overlay = np.column_stack([overlay, mask_overlay]) images.append(overlay) if len(images) >= max_images: break return images