Esempio n. 1
0
def load_image(image_path):
    image = read_image(image_path, format="BGR")
    h = image.shape[0]
    w = image.shape[1]
    img_trans = TransformList(
        [ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
    image = torch.tensor(img_trans.apply_image(image).copy()).permute(
        2, 0, 1)  # copy to make it writeable
    return image, (w, h)
    def get_transform(self, img):
        h, w = img.shape[:2]
        if min(h, w) >= self.short_edge_length[0]:
            return NoOpTransform()

        scale = self.short_edge_length[0] * 1.0 / min(h, w)
        newh = h * scale
        neww = w * scale
        neww = int(neww + 0.5)
        newh = int(newh + 0.5)
        return ResizeTransform(h, w, newh, neww, self.interp)
    def __call__(self, dataset_dict):
        """
        Args:
            dict: a dict in standard model input format. See tutorials for details.

        Returns:
            list[dict]:
                a list of dicts, which contain augmented version of the input image.
                The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
                Each dict has field "transforms" which is a TransformList,
                containing the transforms that are used to generate this image.
        """
        numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
        shape = numpy_image.shape
        orig_shape = (dataset_dict["height"], dataset_dict["width"])
        if shape[:2] != orig_shape:
            # It transforms the "original" image in the dataset to the input image
            pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0],
                                      shape[1])
        else:
            pre_tfm = NoOpTransform()

        # Create all combinations of augmentations to use
        aug_candidates = []  # each element is a list[Augmentation]
        for min_size in self.min_sizes:
            resize = ResizeShortestEdge(min_size, self.max_size)
            aug_candidates.append([resize])  # resize only
            if self.flip:
                flip = RandomFlip(prob=1.0)
                aug_candidates.append([resize, flip])  # resize + flip

        # Apply all the augmentations
        ret = []
        for aug in aug_candidates:
            new_image, tfms = apply_augmentations(aug, np.copy(numpy_image))
            torch_image = torch.from_numpy(
                np.ascontiguousarray(new_image.transpose(2, 0, 1)))

            dic = copy.deepcopy(dataset_dict)
            dic["transforms"] = pre_tfm + tfms
            dic["image"] = torch_image

            if self.proposal_topk is not None:
                image_shape = new_image.shape[:2]  # h, w
                transform_proposals(dic,
                                    image_shape,
                                    tfms,
                                    proposal_topk=self.proposal_topk)

            ret.append(dic)
        return ret
Esempio n. 4
0
    def __call__(self, dataset_dict):
        """
        Args:
            dict: a detection dataset dict in standard format

        Returns:
            list[dict]:
                a list of dataset dicts, which contain augmented version of the input image.
                The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
                Each dict has field "transforms" which is a TransformList,
                containing the transforms that are used to generate this image.
        """
        numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
        shape = numpy_image.shape
        orig_shape = (dataset_dict["height"], dataset_dict["width"])
        if shape[:2] != orig_shape:
            # It transforms the "original" image in the dataset to the input image
            pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0],
                                      shape[1])
        else:
            pre_tfm = NoOpTransform()

        # Create all combinations of augmentations to use
        tfm_gen_candidates = []  # each element is a list[TransformGen]
        for min_size in self.min_sizes:
            resize = ResizeShortestEdge(min_size, self.max_size)
            tfm_gen_candidates.append([resize])  # resize only
            if self.flip:
                flip = RandomFlip(prob=1.0)
                tfm_gen_candidates.append([resize, flip])  # resize + flip
            if self.vertical_flip:
                flip = RandomFlip(prob=1.0, horizontal=False, vertical=True)
                tfm_gen_candidates.append([resize, flip])  # resize + flip

            tfm_gen_candidates.append([resize, Rotation90Gen(prob=1.0)])
            # tfm_gen_candidates.append([resize, CLAHEGen(prob=1.0, img_format=self.image_format)])
            tfm_gen_candidates.append([
                resize,
                Rotation90Gen(prob=1.0),
                Rotation90Gen(prob=1.0),
                Rotation90Gen(prob=1.0)
            ])

        # Apply all the augmentations
        ret = []
        for tfm_gen in tfm_gen_candidates:
            new_image, tfms = apply_transform_gens(tfm_gen,
                                                   np.copy(numpy_image))
            torch_image = torch.from_numpy(
                np.ascontiguousarray(new_image.transpose(2, 0, 1)))

            dic = copy.deepcopy(dataset_dict)
            dic["transforms"] = pre_tfm + tfms
            dic["image"] = torch_image

            # pop original image width and height to prevent rescale boxes in postprocess of detector
            dic.pop("width")
            dic.pop("height")

            ret.append(dic)
        return ret