Exemple #1
0
    def test_merge(self):
        # load coco files to be combined
        coco_path1 = "tests/data/coco_utils/terrain1_coco.json"
        coco_path2 = "tests/data/coco_utils/terrain2_coco.json"
        coco_dict1 = load_json(coco_path1)
        self.assertEqual(len(coco_dict1["images"]), 1)
        self.assertEqual(len(coco_dict1["annotations"]), 7)
        self.assertEqual(len(coco_dict1["categories"]), 1)

        coco_dict2 = load_json(coco_path2)
        self.assertEqual(len(coco_dict2["images"]), 1)
        self.assertEqual(len(coco_dict2["annotations"]), 5)
        self.assertEqual(len(coco_dict2["categories"]), 1)

        # merge without desired_name2id
        merged_coco_dict = merge(coco_dict1, coco_dict2)
        self.assertEqual(len(merged_coco_dict["images"]), 2)
        self.assertEqual(len(merged_coco_dict["annotations"]), 7)
        self.assertEqual(len(merged_coco_dict["categories"]), 1)

        # merge with desired_name2id
        desired_name2id = {"human": 1, "car": 2, "big_vehicle": 3}
        merged_coco_dict = merge(coco_dict1, coco_dict2, desired_name2id)
        self.assertEqual(len(merged_coco_dict["images"]), 2)
        self.assertEqual(len(merged_coco_dict["annotations"]), 12)
        self.assertEqual(len(merged_coco_dict["categories"]), 3)
        self.assertEqual(merged_coco_dict["annotations"][6]["category_id"], 1)
        self.assertEqual(merged_coco_dict["annotations"][6]["image_id"], 1)
        self.assertEqual(merged_coco_dict["annotations"][6]["id"], 7)
        self.assertEqual(merged_coco_dict["annotations"][7]["category_id"], 2)
        self.assertEqual(merged_coco_dict["annotations"][7]["image_id"], 2)
        self.assertEqual(merged_coco_dict["annotations"][7]["id"], 8)
Exemple #2
0
    def test_merge_from_list(self):
        from sahi.utils.coco import merge_from_list

        # load coco files to be combined
        coco_path1 = "tests/data/coco_utils/terrain1_coco.json"
        coco_path2 = "tests/data/coco_utils/terrain2_coco.json"
        coco_path3 = "tests/data/coco_utils/terrain3_coco.json"
        coco_dict1 = load_json(coco_path1)
        self.assertEqual(len(coco_dict1["images"]), 1)
        self.assertEqual(len(coco_dict1["annotations"]), 7)
        self.assertEqual(len(coco_dict1["categories"]), 1)

        coco_dict2 = load_json(coco_path2)
        self.assertEqual(len(coco_dict2["images"]), 1)
        self.assertEqual(len(coco_dict2["annotations"]), 5)
        self.assertEqual(len(coco_dict2["categories"]), 1)

        coco_dict3 = load_json(coco_path3)
        self.assertEqual(len(coco_dict3["images"]), 1)
        self.assertEqual(len(coco_dict3["annotations"]), 10)
        self.assertEqual(len(coco_dict3["categories"]), 1)

        # merge without desired_name2id
        merged_coco_dict = merge_from_list(
            [coco_dict1, coco_dict2, coco_dict3])
        self.assertEqual(len(merged_coco_dict["images"]), 3)
        self.assertEqual(len(merged_coco_dict["annotations"]), 22)
        self.assertEqual(len(merged_coco_dict["categories"]), 2)
        self.assertEqual(
            merged_coco_dict["annotations"][12]["bbox"],
            coco_dict3["annotations"][0]["bbox"],
        )
        self.assertEqual(
            merged_coco_dict["annotations"][12]["id"],
            13,
        )
        self.assertEqual(
            merged_coco_dict["annotations"][12]["image_id"],
            3,
        )
        self.assertEqual(
            merged_coco_dict["annotations"][9]["category_id"],
            1,
        )
        self.assertEqual(
            merged_coco_dict["annotations"][9]["image_id"],
            2,
        )
Exemple #3
0
    def test_coco(self):
        from sahi.utils.coco import Coco

        category_mapping = {"1": "human", "2": "car"}
        # init coco
        coco_path = "tests/data/coco_utils/terrain_all_coco.json"
        coco_dict = load_json(coco_path)
        coco1 = Coco(coco_dict)
        coco2 = Coco.from_coco_path(coco_path)

        # compare
        self.assertEqual(len(coco1.images), 3)
        self.assertEqual(len(coco2.images), 3)
        self.assertEqual(coco1.images[2].annotations[1].category_name, "human")
        self.assertEqual(coco2.images[2].annotations[1].category_name, "human")
        self.assertEqual(
            coco1.images[1].annotations[1].segmentation,
            [[501, 451, 622, 451, 622, 543, 501, 543]],
        )
        self.assertEqual(
            coco2.images[1].annotations[1].segmentation,
            [[501, 451, 622, 451, 622, 543, 501, 543]],
        )
        self.assertEqual(
            coco1.category_mapping,
            category_mapping,
        )
        self.assertEqual(
            coco2.category_mapping,
            category_mapping,
        )
Exemple #4
0
    def test_slice_image(self):
        # read coco file
        coco_path = "tests/data/coco_utils/terrain1_coco.json"
        coco_dict = load_json(coco_path)
        # create coco_utils.Coco object
        coco = Coco(coco_dict)

        output_file_name = None
        output_dir = None
        image_path = "tests/data/coco_utils/" + coco.images[0].file_name
        slice_image_result, num_total_invalid_segmentation = slice_image(
            image=image_path,
            coco_annotation_list=coco.images[0].annotations,
            output_file_name=output_file_name,
            output_dir=output_dir,
            slice_height=512,
            slice_width=512,
            max_allowed_zeros_ratio=0.2,
            overlap_height_ratio=0.1,
            overlap_width_ratio=0.4,
            slice_sep="|",
            out_ext=".png",
            verbose=False,
        )

        self.assertEqual(len(slice_image_result.images), 21)
        self.assertEqual(len(slice_image_result.coco_images), 21)
        self.assertEqual(slice_image_result.coco_images[0].annotations, [])
        self.assertEqual(slice_image_result.coco_images[15].annotations[1].area, 12483)
        self.assertEqual(
            slice_image_result.coco_images[15].annotations[1].bbox,
            [341, 204, 73, 171],
        )
Exemple #5
0
def count_images_with_category(coco_file_path):
    """Reads a coco dataset file and returns an DatasetClassCounts object
     that stores the number of images that include each category in a dataset
    Returns: DatasetClassCounts object
    coco_file_path : str
        path to coco dataset file
    """

    image_id_2_category_2_count = defaultdict(lambda: defaultdict(lambda: 0))
    coco = load_json(coco_file_path)
    for annotation in coco["annotations"]:
        image_id = annotation["image_id"]
        cid = annotation["category_id"]
        image_id_2_category_2_count[image_id][cid] = (
            image_id_2_category_2_count[image_id][cid] + 1)

    category_2_count = defaultdict(lambda: 0)
    for image_id, image_category_2_count in image_id_2_category_2_count.items(
    ):
        for cid, count in image_category_2_count.items():
            if count > 0:
                category_2_count[cid] = category_2_count[cid] + 1

    category_2_count = dict(category_2_count)
    total_images = len(image_id_2_category_2_count.keys())
    return DatasetClassCounts(category_2_count, total_images)
Exemple #6
0
    def from_coco_path(cls, coco_path):
        """
        Creates Coco object from COCO dataset file path.

        Args:
            coco_path: str
                Location of the coco dataset file.
        """
        coco_dict = load_json(coco_path)
        return cls(coco_dict)
Exemple #7
0
def merge_from_file(coco_path1: str, coco_path2: str, save_path: str):
    """
    Combines 2 coco formatted annotations files given their paths, and saves the combined file to save_path.

    Arguments:
    ---------
        coco_path1 : str
            Path for the first coco file.
        coco_path2 : str
            Path for the second coco file.
        save_path : str
            "dirname/coco.json"
    """

    # load coco files to be combined
    coco_dict1 = load_json(coco_path1)
    coco_dict2 = load_json(coco_path2)

    # merge coco dicts
    merged_coco_dict = merge(coco_dict1, coco_dict2)

    # save merged coco dict
    save_json(merged_coco_dict, save_path)
Exemple #8
0
def add_bbox_and_area_to_coco(
    source_coco_path: str = "",
    target_coco_path: str = "",
    add_bbox: bool = True,
    add_area: bool = True,
) -> dict:
    """
    Takes single coco dataset file path, calculates and fills bbox and area fields of the annotations
    and exports the updated coco dict.
    Returns:
    coco_dict : dict
        Updated coco dict
    """
    coco_dict = load_json(source_coco_path)
    coco_dict = copy.deepcopy(coco_dict)

    annotations = coco_dict["annotations"]
    for ind, annotation in enumerate(annotations):
        # assign annotation bbox
        if add_bbox:
            coco_polygons = []
            [
                coco_polygons.extend(coco_polygon)
                for coco_polygon in annotation["segmentation"]
            ]
            minx, miny, maxx, maxy = list([
                min(coco_polygons[0::2]),
                min(coco_polygons[1::2]),
                max(coco_polygons[0::2]),
                max(coco_polygons[1::2]),
            ])
            x, y, width, height = (
                int(minx),
                int(miny),
                int(maxx - minx),
                int(maxy - miny),
            )
            annotations[ind]["bbox"] = [x, y, width, height]

        # assign annotation area
        if add_area:
            shapely_multipolygon = get_shapely_multipolygon(
                coco_segmentation=annotation["segmentation"])
            annotations[ind]["area"] = shapely_multipolygon.area

    coco_dict["annotations"] = annotations
    save_json(coco_dict, target_coco_path)
    return coco_dict
Exemple #9
0
    def test_update_categories(self):
        coco_path = "tests/data/coco_utils/terrain2_coco.json"
        source_coco_dict = load_json(coco_path)

        self.assertEqual(len(source_coco_dict["annotations"]), 5)
        self.assertEqual(len(source_coco_dict["images"]), 1)
        self.assertEqual(len(source_coco_dict["categories"]), 1)
        self.assertEqual(
            source_coco_dict["categories"],
            [{
                "id": 1,
                "name": "car",
                "supercategory": "car"
            }],
        )
        self.assertEqual(source_coco_dict["annotations"][1]["category_id"], 1)

        # update categories
        desired_name2id = {"human": 1, "car": 2, "big_vehicle": 3}
        target_coco_dict = update_categories(desired_name2id=desired_name2id,
                                             coco_dict=source_coco_dict)

        self.assertEqual(len(target_coco_dict["annotations"]), 5)
        self.assertEqual(len(target_coco_dict["images"]), 1)
        self.assertEqual(len(target_coco_dict["categories"]), 3)
        self.assertEqual(
            target_coco_dict["categories"],
            [
                {
                    "id": 1,
                    "name": "human",
                    "supercategory": "human"
                },
                {
                    "id": 2,
                    "name": "car",
                    "supercategory": "car"
                },
                {
                    "id": 3,
                    "name": "big_vehicle",
                    "supercategory": "big_vehicle"
                },
            ],
        )
        self.assertEqual(target_coco_dict["annotations"][1]["category_id"], 2)
Exemple #10
0
    def test_get_imageid2annotationlist_mapping(self):
        coco_path = "tests/data/coco_utils/combined_coco.json"
        coco_dict = load_json(coco_path)
        imageid2annotationlist_mapping = get_imageid2annotationlist_mapping(
            coco_dict)
        self.assertEqual(len(imageid2annotationlist_mapping), 2)

        def check_image_id(image_id):

            image_ids = [
                annotationlist["image_id"]
                for annotationlist in imageid2annotationlist_mapping[image_id]
            ]
            self.assertEqual(image_ids, [image_id] * len(image_ids))

        check_image_id(image_id=1)
        check_image_id(image_id=2)
Exemple #11
0
def update_categories_from_file(desired_name2id: dict, coco_path: str,
                                save_path: str) -> None:
    """
    Rearranges category mapping of a COCO dictionary in coco_path based on given category_mapping.
    Can also be used to filter some of the categories.
    Arguments:
    ---------
        desired_name2id : dict
            {"human": 1, "car": 2, "big_vehicle": 3}
        coco_path : str
            "dirname/coco.json"
    """
    # load source coco dict
    coco_source = load_json(coco_path)

    # update categories
    coco_target = update_categories(desired_name2id, coco_source)

    # save modified coco file
    save_json(coco_target, save_path)
Exemple #12
0
def slice_coco(
    coco_annotation_file_path: str,
    image_dir: str,
    output_coco_annotation_file_name: str,
    output_dir: Optional[str] = None,
    ignore_negative_samples: bool = False,
    slice_height: int = 512,
    slice_width: int = 512,
    overlap_height_ratio: float = 0.2,
    overlap_width_ratio: float = 0.2,
    min_area_ratio: float = 0.1,
    out_ext: Optional[str] = None,
    verbose: bool = False,
) -> List[Union[Dict, str]]:
    """
    Slice large images given in a directory, into smaller windows. If out_name is given export sliced images and coco file.

    Args:
        coco_annotation_file_pat (str): Location of the coco annotation file
        image_dir (str): Base directory for the images
        output_coco_annotation_file_name (str): File name of the exported coco
            datatset json.
        output_dir (str, optional): Output directory
        ignore_negative_samples (bool): If True, images without annotations
            are ignored. Defaults to False.
        slice_height (int): Height of each slice. Default 512.
        slice_width (int): Width of each slice. Default 512.
        overlap_height_ratio (float): Fractional overlap in height of each
            slice (e.g. an overlap of 0.2 for a slice of size 100 yields an
            overlap of 20 pixels). Default 0.2.
        overlap_width_ratio (float): Fractional overlap in width of each
            slice (e.g. an overlap of 0.2 for a slice of size 100 yields an
            overlap of 20 pixels). Default 0.2.
        min_area_ratio (float): If the cropped annotation area to original annotation
            ratio is smaller than this value, the annotation is filtered out. Default 0.1.
        out_ext (str, optional): Extension of saved images. Default is the
            original suffix.
        verbose (bool, optional): Switch to print relevant values to screen.
            Default 'False'.

    Returns:
        coco_dict: dict
            COCO dict for sliced images and annotations
        save_path: str
            Path to the saved coco file
    """

    # read coco file
    coco_dict: Dict = load_json(coco_annotation_file_path)
    # create image_id_to_annotation_list mapping
    coco = Coco.from_coco_dict_or_path(coco_dict)
    # init sliced coco_utils.CocoImage list
    sliced_coco_images: List = []

    # iterate over images and slice
    for coco_image in tqdm(coco.images):
        # get image path
        image_path: str = os.path.join(image_dir, coco_image.file_name)
        # get annotation json list corresponding to selected coco image
        # slice image
        slice_image_result = slice_image(
            image=image_path,
            coco_annotation_list=coco_image.annotations,
            output_file_name=Path(coco_image.file_name).stem,
            output_dir=output_dir,
            slice_height=slice_height,
            slice_width=slice_width,
            overlap_height_ratio=overlap_height_ratio,
            overlap_width_ratio=overlap_width_ratio,
            min_area_ratio=min_area_ratio,
            out_ext=out_ext,
            verbose=verbose,
        )
        # append slice outputs
        sliced_coco_images.extend(slice_image_result.coco_images)

    # create and save coco dict
    coco_dict = create_coco_dict(
        sliced_coco_images,
        coco_dict["categories"],
        ignore_negative_samples=ignore_negative_samples,
    )
    save_path = ""
    if output_coco_annotation_file_name and output_dir:
        save_path = os.path.join(
            output_dir, output_coco_annotation_file_name + "_coco.json")
        save_json(coco_dict, save_path)

    return coco_dict, save_path
Exemple #13
0
def slice_coco(
    coco_annotation_file_path: str,
    image_dir: str,
    output_coco_annotation_file_name: str = "",
    output_dir: str = "",
    ignore_negative_samples: bool = True,
    slice_height: int = 256,
    slice_width: int = 256,
    max_allowed_zeros_ratio: float = 0.2,
    overlap_height_ratio: float = 0.2,
    overlap_width_ratio: float = 0.2,
    slice_sep: str = "_",
    out_ext: str = ".png",
    verbose: bool = False,
):
    """
    Slice large images given in a directory, into smaller windows. If out_name is given export sliced images and coco file.

    Args:
        coco_annotation_file_path: str
            Location of the coco annotation file
        image_dir: str
            Base diectory for the images
        output_coco_annotation_file_name : str
            Root name of the exported coco datatset file
        output_dir: str
            Output directory
        ignore_negative_samples: bool
            If True, images without annotations are ignored. Defaults to True.
        slice_height: int
            Height of each slice.  Defaults to ``256``.
        slice_width: int
            Width of each slice.  Defaults to ``256``.
        max_allowed_zeros_ratio: float
            Maximum fraction of window that is allowed to be zeros or null.
            Defaults to ``0.2``.
        overlap_height_ratio: float
            Fractional overlap in height of each window (e.g. an overlap of 0.2 for a window
            of size 256 yields an overlap of 51 pixels).
            Default to ``0.2``.
        overlap_width_ratio: float
            Fractional overlap in width of each window (e.g. an overlap of 0.2 for a window
            of size 256 yields an overlap of 51 pixels).
            Default to ``0.2``.
        slice_sep: str
            Character used to separate outname from coordinates in the saved
            windows. Defaults to ``|``
        out_ext: str
            Extension of saved images. Defaults to ``.png``.
        verbose: bool
            Switch to print relevant values to screen. Defaults to ``False``

    Returns:
        coco_dict: dict
            COCO dict for sliced images and annotations
        save_path: str
            Path to the saved coco file
    """
    # define verboseprint
    verboseprint = print if verbose else lambda *a, **k: None

    # read coco file
    coco_dict = load_json(coco_annotation_file_path)
    # create coco_utils.Coco object
    coco = Coco(coco_dict)
    # init sliced coco_utils.CocoImage list
    sliced_coco_images = []

    num_total_invalid_segmentation = 0

    # iterate over images and slice
    for coco_image in tqdm(coco.images):
        # get image path
        image_path = os.path.join(image_dir, coco_image.file_name)
        # get coco_utils.CocoAnnotation list corresponding to selected coco_utils.CocoImage
        coco_annotation_list = coco_image.annotations
        # slice image
        slice_image_result, num_invalid_segmentation = slice_image(
            image=image_path,
            coco_annotation_list=coco_annotation_list,
            output_file_name=os.path.basename(coco_image.file_name),
            output_dir=output_dir,
            slice_height=slice_height,
            slice_width=slice_width,
            max_allowed_zeros_ratio=max_allowed_zeros_ratio,
            overlap_height_ratio=overlap_height_ratio,
            overlap_width_ratio=overlap_width_ratio,
            slice_sep="_",
            out_ext=".png",
            verbose=False,
        )
        num_total_invalid_segmentation = (num_total_invalid_segmentation +
                                          num_invalid_segmentation)
        # append slice outputs
        sliced_coco_images.extend(slice_image_result.coco_images)

    # create and save coco dict
    coco_dict = create_coco_dict(
        sliced_coco_images,
        coco_dict["categories"],
        ignore_negative_samples=ignore_negative_samples,
    )
    save_path = ""
    if output_coco_annotation_file_name and output_dir:
        save_path = os.path.join(
            output_dir, output_coco_annotation_file_name + "_coco.json")
        save_json(coco_dict, save_path)
    verboseprint("There are", num_total_invalid_segmentation,
                 "invalid segmentations")

    return coco_dict, save_path
Exemple #14
0
def split_coco_as_train_val(
    coco_file_path: str,
    target_dir: str = "",
    train_split_rate: float = 0.9,
    numpy_seed: int = 0,
):
    """
    Takes single coco dataset file path, split images into train-val and saves as seperate coco dataset files.

    coco_file_path: str
    target_dir: str
    train_split_rate: float
    numpy_seed: int

    Returns:
    coco_dict_paths : dict
        Constains exported coco file paths as {"train": "", "val": ""}
    """
    # fix numpy numpy seed
    np.random.seed(numpy_seed)

    # read coco dict
    coco_dict = load_json(coco_file_path)

    # divide coco dict into train val coco dicts
    num_images = len(coco_dict["images"])
    random_indices = np.random.permutation(num_images).tolist()
    random_indices = [random_indice + 1 for random_indice in random_indices]
    image_ids = {a["image_id"] for a in coco_dict["annotations"]}
    image_id_2_idx = {i_id: i for i, i_id in enumerate(image_ids)}
    num_train = int(num_images * train_split_rate)

    # divide images
    train_indices = random_indices[:num_train]
    val_indices = random_indices[num_train:]
    train_images = np.array(coco_dict["images"])[(np.array(train_indices) -
                                                  1).tolist()].tolist()
    val_images = np.array(coco_dict["images"])[(np.array(val_indices) -
                                                1).tolist()].tolist()
    # divide annotations
    train_annotations = list()
    val_annotations = list()
    for annotation in tqdm(coco_dict["annotations"]):
        image_index_for_annotation = image_id_2_idx[annotation["image_id"]]
        if image_index_for_annotation in train_indices:
            train_annotations.append(annotation)
        elif image_index_for_annotation in val_indices:
            val_annotations.append(annotation)
    # form train val coco dicts
    train_coco_dict = {
        "images": train_images,
        "annotations": train_annotations,
        "categories": coco_dict["categories"],
    }
    val_coco_dict = {
        "images": val_images,
        "annotations": val_annotations,
        "categories": coco_dict["categories"],
    }
    # get filename of the base coco file
    base_coco_filename = os.path.basename(coco_file_path).replace(".json", "")
    # save train val coco files
    if not target_dir:
        target_dir = os.path.dirname(coco_file_path)
    train_coco_dict_path = os.path.join(target_dir,
                                        base_coco_filename + "_train.json")
    save_json(train_coco_dict, train_coco_dict_path)
    val_coco_dict_path = os.path.join(target_dir,
                                      base_coco_filename + "_val.json")
    save_json(val_coco_dict, val_coco_dict_path)
    coco_dict_paths = {
        "train": train_coco_dict_path,
        "val": val_coco_dict_path
    }
    return coco_dict_paths