def crop_object_predictions( image: np.array, object_prediction_list, output_dir: str = "", file_name: str = "prediction_visual", ): """ Crops bounding boxes over the source image and exports it to output folder. Arguments: object_predictions: a list of prediction.ObjectPrediction output_dir: directory for resulting visualization to be exported file_name: exported file will be saved as: output_dir+file_name+".png" """ # create output folder if not present create_dir(output_dir) # add bbox and mask to image if present for ind, object_prediction in enumerate(object_prediction_list): # deepcopy object_prediction_list so that original is not altered object_prediction = object_prediction.deepcopy() bbox = object_prediction.bbox.to_voc_bbox() category_id = object_prediction.category.id # crop detections # deepcopy crops so that original is not altered cropped_img = copy.deepcopy(image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2]), :, ]) save_path = os.path.join( output_dir, file_name + "_box" + str(ind) + "_class" + str(category_id) + ".png", ) cv2.imwrite(save_path, cv2.cvtColor(cropped_img, cv2.COLOR_RGB2BGR))
def download_mmdet_cascade_mask_rcnn_model(): import urllib.request from os import path from sahi.utils.file import create_dir create_dir("tests/data/models/mmdet_cascade_mask_rcnn/") if not path.exists(mmdet_cascade_mask_rcnn_model_path): urllib.request.urlretrieve( mmdet_cascade_mask_rcnn_model_url, mmdet_cascade_mask_rcnn_model_path, )
def visualize_object_predictions( image: np.array, object_prediction_list, rect_th: float = 1, text_size: float = 0.3, text_th: float = 1, color: tuple = (0, 0, 0), output_dir: Optional[str] = None, file_name: str = "prediction_visual", export_format: str = "png", ): """ Visualizes prediction category names, bounding boxes over the source image and exports it to output folder. Arguments: object_prediction_list: a list of prediction.ObjectPrediction rect_th: rectangle thickness text_size: size of the category name over box text_th: text thickness color: annotation color in the form: (0, 255, 0) output_dir: directory for resulting visualization to be exported file_name: exported file will be saved as: output_dir+file_name+".png" export_format: can be specified as 'jpg' or 'png' """ elapsed_time = time.time() # deepcopy image so that original is not altered image = copy.deepcopy(image) # select random color if not specified if color == (0, 0, 0): color = select_random_color() # add bbox and mask to image if present for object_prediction in object_prediction_list: # deepcopy object_prediction_list so that original is not altered object_prediction = object_prediction.deepcopy() bbox = object_prediction.bbox.to_voc_bbox() category_name = object_prediction.category.name score = object_prediction.score.value # visualize masks if present if object_prediction.mask is not None: # deepcopy mask so that original is not altered mask = object_prediction.mask.bool_mask # draw mask rgb_mask = apply_color_mask(mask, color) image = cv2.addWeighted(image, 1, rgb_mask, 0.4, 0) # visualize boxes cv2.rectangle( image, tuple(bbox[0:2]), tuple(bbox[2:4]), color=color, thickness=rect_th, ) # arange bounding box text location if bbox[1] - 5 > 5: bbox[1] -= 5 else: bbox[1] += 5 # add bounding box text label = "%s %.2f" % (category_name, score) cv2.putText( image, label, tuple(bbox[0:2]), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, thickness=text_th, ) if output_dir: # create output folder if not present create_dir(output_dir) # save inference result save_path = os.path.join(output_dir, file_name + "." + export_format) cv2.imwrite(save_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) elapsed_time = time.time() - elapsed_time return {"image": image, "elapsed_time": elapsed_time}
def visualize_prediction( image: np.ndarray, boxes: List[List], classes: List[str], masks: Optional[List[np.ndarray]] = None, rect_th: float = 3, text_size: float = 3, text_th: float = 3, color: tuple = (0, 0, 0), output_dir: Optional[str] = None, file_name: Optional[str] = "prediction_visual", ): """ Visualizes prediction classes, bounding boxes over the source image and exports it to output folder. """ elapsed_time = time.time() # deepcopy image so that original is not altered image = copy.deepcopy(image) # select random color if not specified if color == (0, 0, 0): color = select_random_color() # add bbox and mask to image if present for i in range(len(boxes)): # deepcopy boxso that original is not altered box = copy.deepcopy(boxes[i]) class_ = classes[i] # visualize masks if present if masks is not None: # deepcopy mask so that original is not altered mask = copy.deepcopy(masks[i]) # draw mask rgb_mask = apply_color_mask(np.squeeze(mask), color) image = cv2.addWeighted(image, 1, rgb_mask, 0.7, 0) # visualize boxes cv2.rectangle( image, tuple(box[0:2]), tuple(box[2:4]), color=color, thickness=rect_th, ) # arange bounding box text location if box[1] - 10 > 10: box[1] -= 10 else: box[1] += 10 # add bounding box text cv2.putText( image, class_, tuple(box[0:2]), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, thickness=text_th, ) if output_dir: # create output folder if not present create_dir(output_dir) # save inference result save_path = os.path.join(output_dir, file_name + ".png") cv2.imwrite(save_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) elapsed_time = time.time() - elapsed_time return {"image": image, "elapsed_time": elapsed_time}
def slice_image( image: Union[str, Image.Image], coco_annotation_list: Optional[CocoAnnotation] = None, output_file_name: Optional[str] = None, output_dir: Optional[str] = None, slice_height: int = 512, slice_width: int = 512, overlap_height_ratio: float = 0.2, overlap_width_ratio: float = 0.2, min_area_ratio: float = 0.1, out_ext: Optional[str] = None, verbose: bool = False, ) -> SliceImageResult: """Slice a large image into smaller windows. If output_file_name is given export sliced images. Args: image (str or PIL.Image): File path of image or Pillow Image to be sliced. coco_annotation_list (CocoAnnotation): List of CocoAnnotation objects. output_file_name (str, optional): Root name of output files (coordinates will be appended to this) output_dir (str, optional): Output directory slice_height (int): Height of each slice. Default 512. slice_width (int): Width of each slice. Default 512. overlap_height_ratio (float): Fractional overlap in height of each slice (e.g. an overlap of 0.2 for a slice of size 100 yields an overlap of 20 pixels). Default 0.2. overlap_width_ratio (float): Fractional overlap in width of each slice (e.g. an overlap of 0.2 for a slice of size 100 yields an overlap of 20 pixels). Default 0.2. min_area_ratio (float): If the cropped annotation area to original annotation ratio is smaller than this value, the annotation is filtered out. Default 0.1. out_ext (str, optional): Extension of saved images. Default is the original suffix. verbose (bool, optional): Switch to print relevant values to screen. Default 'False'. Returns: sliced_image_result: SliceImageResult: sliced_image_list: list of SlicedImage image_dir: str Directory of the sliced image exports. original_image_size: list of int Size of the unsliced original image in [height, width] num_total_invalid_segmentation: int Number of invalid segmentation annotations. """ # define verboseprint verboseprint = print if verbose else lambda *a, **k: None def _export_single_slice(image: np.ndarray, output_dir: str, slice_file_name: str): image_pil = read_image_as_pil(image) slice_file_path = str(Path(output_dir) / slice_file_name) # export sliced image image_pil.save(slice_file_path) verboseprint("sliced image path:", slice_file_path) # create outdir if not present if output_dir: create_dir(output_dir) # read image image_pil = read_image_as_pil(image) verboseprint("image.shape:", image_pil.size) image_width, image_height = image_pil.size assert image_width != 0 and image_height != 0, f"invalid image size: {image_pil.size} for 'slice_image'." slice_bboxes = get_slice_bboxes( image_height=image_height, image_width=image_width, slice_height=slice_height, slice_width=slice_width, overlap_height_ratio=overlap_height_ratio, overlap_width_ratio=overlap_width_ratio, ) t0 = time.time() n_ims = 0 # init images and annotations lists sliced_image_result = SliceImageResult( original_image_size=[image_height, image_width], image_dir=output_dir) # iterate over slices for slice_bbox in slice_bboxes: n_ims += 1 # extract image image_pil_slice = image_pil.crop(slice_bbox) # process annotations if coco_annotations is given if coco_annotation_list is not None: sliced_coco_annotation_list = process_coco_annotations( coco_annotation_list, slice_bbox, min_area_ratio) # set image file suffixes slice_suffixes = "_".join(map(str, slice_bbox)) if out_ext: suffix = out_ext else: try: suffix = Path(image_pil.filename).suffix except AttributeError: suffix = ".jpg" # set image file name and path slice_file_name = f"{output_file_name}_{slice_suffixes}{suffix}" # create coco image slice_width = slice_bbox[2] - slice_bbox[0] slice_height = slice_bbox[3] - slice_bbox[1] coco_image = CocoImage(file_name=slice_file_name, height=slice_height, width=slice_width) # append coco annotations (if present) to coco image if coco_annotation_list: for coco_annotation in sliced_coco_annotation_list: coco_image.add_annotation(coco_annotation) # create sliced image and append to sliced_image_result sliced_image = SlicedImage( image=np.asarray(image_pil_slice), coco_image=coco_image, starting_pixel=[slice_bbox[0], slice_bbox[1]], ) sliced_image_result.add_sliced_image(sliced_image) # export slices if output directory is provided if output_file_name and output_dir: conc_exec = concurrent.futures.ThreadPoolExecutor( max_workers=MAX_WORKERS) conc_exec.map( _export_single_slice, sliced_image_result.images, [output_dir] * len(sliced_image_result), sliced_image_result.filenames, ) verboseprint( "Num slices:", n_ims, "slice_height", slice_height, "slice_width", slice_width, ) verboseprint("Time to slice", image, time.time() - t0, "seconds") return sliced_image_result
def slice_image( image, coco_annotation_list=None, output_file_name: str = "", output_dir: str = "", slice_height: int = 256, slice_width: int = 256, max_allowed_zeros_ratio: float = 0.2, overlap_height_ratio: float = 0.2, overlap_width_ratio: float = 0.2, slice_sep: str = "_", out_ext: str = ".png", verbose: bool = False, ) -> (SliceImageResult, int): """ Slice a large image into smaller windows. If output_file_name is given export sliced images. Args: image: str or np.ndarray Location of image or numpy image matrix to slice coco_annotation_list: list of CocoAnnotation List of CocoAnnotation objects that belong to given COCO image. output_file_name: str Root name of output files (coordinates will be appended to this) output_dir: str Output directory slice_height: int Height of each slice. Defaults to ``256``. slice_width: int Width of each slice. Defaults to ``256``. max_allowed_zeros_ratio: float Maximum fraction of window that is allowed to be zeros or null. Defaults to ``0.2``. overlap_height_ratio: float Fractional overlap in height of each window (e.g. an overlap of 0.2 for a window of size 256 yields an overlap of 51 pixels). Default to ``0.2``. overlap_width_ratio: float Fractional overlap in width of each window (e.g. an overlap of 0.2 for a window of size 256 yields an overlap of 51 pixels). Default to ``0.2``. slice_sep: str Character used to separate outname from coordinates in the saved windows. Defaults to ``|`` out_ext: str Extension of saved images. Defaults to ``.png``. verbose: bool Switch to print relevant values to screen. Defaults to ``False`` Returns: sliced_image_result: SliceImageResult: sliced_image_list: list of SlicedImage image_dir: str Directory of the sliced image exports. original_image_size: list of int Size of the unsliced original image in [height, width] num_total_invalid_segmentation: int Number of invalid segmentation annotations. """ # define verboseprint verboseprint = print if verbose else lambda *a, **k: None # create outdir if not present if output_dir: create_dir(output_dir) # read image if str image path is provided if isinstance(image, str): # read in image, cv2 fails on large files verboseprint("Read in image:", image) image0, use_cv2 = read_large_image(image) else: image0 = image verboseprint("image.shape:", image0.shape) if len(out_ext) == 0: ext = "." + image.split(".")[-1] else: ext = out_ext win_h, win_w = image0.shape[:2] # if slice sizes are large than image, pad the edges pad = 0 if slice_height > win_h: pad = slice_height - win_h if slice_width > win_w: pad = max(pad, slice_width - win_w) # pad the edge of the image with black pixels if pad > 0: border_color = (0, 0, 0) image0 = cv2.copyMakeBorder(image0, 0, pad, 0, pad, cv2.BORDER_CONSTANT, value=border_color) win_size = slice_height * slice_width t0 = time.time() n_ims = 0 n_ims_nonull = 0 dx = int((1.0 - overlap_width_ratio) * slice_width) dy = int((1.0 - overlap_height_ratio) * slice_height) # init images and annotations lists sliced_image_result = SliceImageResult(original_image_size=[win_h, win_w], image_dir=output_dir) num_total_invalid_segmentation = 0 # iterate over slices for y0 in range(0, image0.shape[0], dy): # slice_height): for x0 in range(0, image0.shape[1], dx): # slice_width): n_ims += 1 if (n_ims % 50) == 0: verboseprint(n_ims) # make sure we don't have a tiny image on the edge if y0 + slice_height > image0.shape[0]: y = image0.shape[0] - slice_height else: y = y0 if x0 + slice_width > image0.shape[1]: x = image0.shape[1] - slice_width else: x = x0 # extract image window_c = image0[y:y + slice_height, x:x + slice_width] # process annotations if coco_annotations is given if coco_annotation_list: slice_box = [x, y, slice_width, slice_height] ( sliced_coco_annotation_list, num_invalid_segmentation, ) = slice_coco_annotations_by_box(coco_annotation_list, box=slice_box) num_total_invalid_segmentation = ( num_total_invalid_segmentation + num_invalid_segmentation) # get black and white image window = cv2.cvtColor(window_c, cv2.COLOR_RGB2GRAY) # find threshold that's not black # https://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html?highlight=threshold ret, thresh1 = cv2.threshold(window, 2, 255, cv2.THRESH_BINARY) non_zero_counts = cv2.countNonZero(thresh1) zero_counts = win_size - non_zero_counts zero_frac = float(zero_counts) / win_size # skip if image is mostly empty if zero_frac >= max_allowed_zeros_ratio: verboseprint("Zero frac too high at:", zero_frac) continue else: # save if out_name is given if output_file_name and output_dir: outpath = os.path.join( output_dir, output_file_name + slice_sep + str(y) + "_" + str(x) + "_" + str(slice_height) + "_" + str(slice_width) + "_" + str(pad) + "_" + str(win_w) + "_" + str(win_h) + ext, ) verboseprint("outpath:", outpath) # if large image, convert to bgr prior to saving if not use_cv2: skimage.io.imsave(outpath, window_c) else: window_c = cv2.cvtColor(window_c, cv2.COLOR_RGB2BGR) cv2.imwrite(outpath, window_c) n_ims_nonull += 1 file_name = outpath.split(output_dir)[-1].replace( os.sep, "") else: file_name = "" # create coco image coco_image = CocoImage(file_name=file_name, height=slice_height, width=slice_width) # append coco annotations (if present) to coco image if coco_annotation_list: for coco_annotation in sliced_coco_annotation_list: coco_image.add_annotation(coco_annotation) # create sliced image and append to sliced_image_result sliced_image = SlicedImage(image=window_c, coco_image=coco_image, starting_pixel=[x, y]) sliced_image_result.add_sliced_image(sliced_image) verboseprint( "Num slices:", n_ims, "Num non-null slices:", n_ims_nonull, "slice_height", slice_height, "slice_width", slice_width, ) verboseprint("Time to slice", image, time.time() - t0, "seconds") return ( sliced_image_result, num_total_invalid_segmentation, )