def add_bbox_and_area_to_coco( source_coco_path: str = "", target_coco_path: str = "", add_bbox: bool = True, add_area: bool = True, ) -> dict: """ Takes single coco dataset file path, calculates and fills bbox and area fields of the annotations and exports the updated coco dict. Returns: coco_dict : dict Updated coco dict """ coco_dict = load_json(source_coco_path) coco_dict = copy.deepcopy(coco_dict) annotations = coco_dict["annotations"] for ind, annotation in enumerate(annotations): # assign annotation bbox if add_bbox: coco_polygons = [] [ coco_polygons.extend(coco_polygon) for coco_polygon in annotation["segmentation"] ] minx, miny, maxx, maxy = list([ min(coco_polygons[0::2]), min(coco_polygons[1::2]), max(coco_polygons[0::2]), max(coco_polygons[1::2]), ]) x, y, width, height = ( int(minx), int(miny), int(maxx - minx), int(maxy - miny), ) annotations[ind]["bbox"] = [x, y, width, height] # assign annotation area if add_area: shapely_multipolygon = get_shapely_multipolygon( coco_segmentation=annotation["segmentation"]) annotations[ind]["area"] = shapely_multipolygon.area coco_dict["annotations"] = annotations save_json(coco_dict, target_coco_path) return coco_dict
def update_categories_from_file(desired_name2id: dict, coco_path: str, save_path: str) -> None: """ Rearranges category mapping of a COCO dictionary in coco_path based on given category_mapping. Can also be used to filter some of the categories. Arguments: --------- desired_name2id : dict {"human": 1, "car": 2, "big_vehicle": 3} coco_path : str "dirname/coco.json" """ # load source coco dict coco_source = load_json(coco_path) # update categories coco_target = update_categories(desired_name2id, coco_source) # save modified coco file save_json(coco_target, save_path)
def merge_from_file(coco_path1: str, coco_path2: str, save_path: str): """ Combines 2 coco formatted annotations files given their paths, and saves the combined file to save_path. Arguments: --------- coco_path1 : str Path for the first coco file. coco_path2 : str Path for the second coco file. save_path : str "dirname/coco.json" """ # load coco files to be combined coco_dict1 = load_json(coco_path1) coco_dict2 = load_json(coco_path2) # merge coco dicts merged_coco_dict = merge(coco_dict1, coco_dict2) # save merged coco dict save_json(merged_coco_dict, save_path)
def slice_coco( coco_annotation_file_path: str, image_dir: str, output_coco_annotation_file_name: str, output_dir: Optional[str] = None, ignore_negative_samples: bool = False, slice_height: int = 512, slice_width: int = 512, overlap_height_ratio: float = 0.2, overlap_width_ratio: float = 0.2, min_area_ratio: float = 0.1, out_ext: Optional[str] = None, verbose: bool = False, ) -> List[Union[Dict, str]]: """ Slice large images given in a directory, into smaller windows. If out_name is given export sliced images and coco file. Args: coco_annotation_file_pat (str): Location of the coco annotation file image_dir (str): Base directory for the images output_coco_annotation_file_name (str): File name of the exported coco datatset json. output_dir (str, optional): Output directory ignore_negative_samples (bool): If True, images without annotations are ignored. Defaults to False. slice_height (int): Height of each slice. Default 512. slice_width (int): Width of each slice. Default 512. overlap_height_ratio (float): Fractional overlap in height of each slice (e.g. an overlap of 0.2 for a slice of size 100 yields an overlap of 20 pixels). Default 0.2. overlap_width_ratio (float): Fractional overlap in width of each slice (e.g. an overlap of 0.2 for a slice of size 100 yields an overlap of 20 pixels). Default 0.2. min_area_ratio (float): If the cropped annotation area to original annotation ratio is smaller than this value, the annotation is filtered out. Default 0.1. out_ext (str, optional): Extension of saved images. Default is the original suffix. verbose (bool, optional): Switch to print relevant values to screen. Default 'False'. Returns: coco_dict: dict COCO dict for sliced images and annotations save_path: str Path to the saved coco file """ # read coco file coco_dict: Dict = load_json(coco_annotation_file_path) # create image_id_to_annotation_list mapping coco = Coco.from_coco_dict_or_path(coco_dict) # init sliced coco_utils.CocoImage list sliced_coco_images: List = [] # iterate over images and slice for coco_image in tqdm(coco.images): # get image path image_path: str = os.path.join(image_dir, coco_image.file_name) # get annotation json list corresponding to selected coco image # slice image slice_image_result = slice_image( image=image_path, coco_annotation_list=coco_image.annotations, output_file_name=Path(coco_image.file_name).stem, output_dir=output_dir, slice_height=slice_height, slice_width=slice_width, overlap_height_ratio=overlap_height_ratio, overlap_width_ratio=overlap_width_ratio, min_area_ratio=min_area_ratio, out_ext=out_ext, verbose=verbose, ) # append slice outputs sliced_coco_images.extend(slice_image_result.coco_images) # create and save coco dict coco_dict = create_coco_dict( sliced_coco_images, coco_dict["categories"], ignore_negative_samples=ignore_negative_samples, ) save_path = "" if output_coco_annotation_file_name and output_dir: save_path = os.path.join( output_dir, output_coco_annotation_file_name + "_coco.json") save_json(coco_dict, save_path) return coco_dict, save_path
for split_type in coco_dict_paths.keys(): coco_annotation_file_path = coco_dict_paths[split_type] sliced_coco_name = get_base_filename(coco_annotation_file_path)[0].replace( ".json", "_sliced_" + str(slice_size) ) coco_dict, coco_path = slice_coco( coco_annotation_file_path=coco_annotation_file_path, image_dir=image_dir, output_coco_annotation_file_name="", output_dir=sliced_output_dir, ignore_negative_samples=ignore_negative_samples, slice_height=slice_size, slice_width=slice_size, max_allowed_zeros_ratio=0.2, overlap_height_ratio=0.2, overlap_width_ratio=0.2, slice_sep="_", out_ext=".png", verbose=True, ) output_sliced_coco_annotation_file_path = os.path.join( output_coco_annotation_directory, sliced_coco_name + ".json" ) save_json(coco_dict, coco_path) print( "Sliced", split_type, "coco file is saved to", output_sliced_coco_annotation_file_path, )
# slice coco dataset images and annotations print("Slicing step is starting...") for slice_size in slice_size_list: output_images_folder_name = ( get_base_filename(args.coco_json_path)[1] + "_sliced_images_" + str(slice_size) + "/") output_images_dir = os.path.join(output_dir, output_images_folder_name) sliced_coco_name = get_base_filename(args.coco_json_path)[0].replace( ".json", "_sliced_" + str(slice_size)) coco_dict, coco_path = slice_coco( coco_annotation_file_path=args.coco_json_path, image_dir=args.coco_image_dir, output_coco_annotation_file_name="", output_dir=output_images_dir, ignore_negative_samples=args.ignore_negative_samples, slice_height=slice_size, slice_width=slice_size, min_area_ratio=0.1, overlap_height_ratio=args.overlap_ratio, overlap_width_ratio=args.overlap_ratio, out_ext=".jpg", verbose=False, ) output_coco_annotation_file_path = os.path.join( output_dir, sliced_coco_name + ".json") save_json(coco_dict, output_coco_annotation_file_path) print( f"Sliced 'slice_size: {slice_size}' coco file is saved to", output_coco_annotation_file_path, )
def slice_coco( coco_annotation_file_path: str, image_dir: str, output_coco_annotation_file_name: str = "", output_dir: str = "", ignore_negative_samples: bool = True, slice_height: int = 256, slice_width: int = 256, max_allowed_zeros_ratio: float = 0.2, overlap_height_ratio: float = 0.2, overlap_width_ratio: float = 0.2, slice_sep: str = "_", out_ext: str = ".png", verbose: bool = False, ): """ Slice large images given in a directory, into smaller windows. If out_name is given export sliced images and coco file. Args: coco_annotation_file_path: str Location of the coco annotation file image_dir: str Base diectory for the images output_coco_annotation_file_name : str Root name of the exported coco datatset file output_dir: str Output directory ignore_negative_samples: bool If True, images without annotations are ignored. Defaults to True. slice_height: int Height of each slice. Defaults to ``256``. slice_width: int Width of each slice. Defaults to ``256``. max_allowed_zeros_ratio: float Maximum fraction of window that is allowed to be zeros or null. Defaults to ``0.2``. overlap_height_ratio: float Fractional overlap in height of each window (e.g. an overlap of 0.2 for a window of size 256 yields an overlap of 51 pixels). Default to ``0.2``. overlap_width_ratio: float Fractional overlap in width of each window (e.g. an overlap of 0.2 for a window of size 256 yields an overlap of 51 pixels). Default to ``0.2``. slice_sep: str Character used to separate outname from coordinates in the saved windows. Defaults to ``|`` out_ext: str Extension of saved images. Defaults to ``.png``. verbose: bool Switch to print relevant values to screen. Defaults to ``False`` Returns: coco_dict: dict COCO dict for sliced images and annotations save_path: str Path to the saved coco file """ # define verboseprint verboseprint = print if verbose else lambda *a, **k: None # read coco file coco_dict = load_json(coco_annotation_file_path) # create coco_utils.Coco object coco = Coco(coco_dict) # init sliced coco_utils.CocoImage list sliced_coco_images = [] num_total_invalid_segmentation = 0 # iterate over images and slice for coco_image in tqdm(coco.images): # get image path image_path = os.path.join(image_dir, coco_image.file_name) # get coco_utils.CocoAnnotation list corresponding to selected coco_utils.CocoImage coco_annotation_list = coco_image.annotations # slice image slice_image_result, num_invalid_segmentation = slice_image( image=image_path, coco_annotation_list=coco_annotation_list, output_file_name=os.path.basename(coco_image.file_name), output_dir=output_dir, slice_height=slice_height, slice_width=slice_width, max_allowed_zeros_ratio=max_allowed_zeros_ratio, overlap_height_ratio=overlap_height_ratio, overlap_width_ratio=overlap_width_ratio, slice_sep="_", out_ext=".png", verbose=False, ) num_total_invalid_segmentation = (num_total_invalid_segmentation + num_invalid_segmentation) # append slice outputs sliced_coco_images.extend(slice_image_result.coco_images) # create and save coco dict coco_dict = create_coco_dict( sliced_coco_images, coco_dict["categories"], ignore_negative_samples=ignore_negative_samples, ) save_path = "" if output_coco_annotation_file_name and output_dir: save_path = os.path.join( output_dir, output_coco_annotation_file_name + "_coco.json") save_json(coco_dict, save_path) verboseprint("There are", num_total_invalid_segmentation, "invalid segmentations") return coco_dict, save_path
def split_coco_as_train_val( coco_file_path: str, target_dir: str = "", train_split_rate: float = 0.9, numpy_seed: int = 0, ): """ Takes single coco dataset file path, split images into train-val and saves as seperate coco dataset files. coco_file_path: str target_dir: str train_split_rate: float numpy_seed: int Returns: coco_dict_paths : dict Constains exported coco file paths as {"train": "", "val": ""} """ # fix numpy numpy seed np.random.seed(numpy_seed) # read coco dict coco_dict = load_json(coco_file_path) # divide coco dict into train val coco dicts num_images = len(coco_dict["images"]) random_indices = np.random.permutation(num_images).tolist() random_indices = [random_indice + 1 for random_indice in random_indices] image_ids = {a["image_id"] for a in coco_dict["annotations"]} image_id_2_idx = {i_id: i for i, i_id in enumerate(image_ids)} num_train = int(num_images * train_split_rate) # divide images train_indices = random_indices[:num_train] val_indices = random_indices[num_train:] train_images = np.array(coco_dict["images"])[(np.array(train_indices) - 1).tolist()].tolist() val_images = np.array(coco_dict["images"])[(np.array(val_indices) - 1).tolist()].tolist() # divide annotations train_annotations = list() val_annotations = list() for annotation in tqdm(coco_dict["annotations"]): image_index_for_annotation = image_id_2_idx[annotation["image_id"]] if image_index_for_annotation in train_indices: train_annotations.append(annotation) elif image_index_for_annotation in val_indices: val_annotations.append(annotation) # form train val coco dicts train_coco_dict = { "images": train_images, "annotations": train_annotations, "categories": coco_dict["categories"], } val_coco_dict = { "images": val_images, "annotations": val_annotations, "categories": coco_dict["categories"], } # get filename of the base coco file base_coco_filename = os.path.basename(coco_file_path).replace(".json", "") # save train val coco files if not target_dir: target_dir = os.path.dirname(coco_file_path) train_coco_dict_path = os.path.join(target_dir, base_coco_filename + "_train.json") save_json(train_coco_dict, train_coco_dict_path) val_coco_dict_path = os.path.join(target_dir, base_coco_filename + "_val.json") save_json(val_coco_dict, val_coco_dict_path) coco_dict_paths = { "train": train_coco_dict_path, "val": val_coco_dict_path } return coco_dict_paths