def load_image(image_path): image = read_image(image_path, format="BGR") h = image.shape[0] w = image.shape[1] img_trans = TransformList( [ResizeTransform(h=h, w=w, new_h=224, new_w=224)]) image = torch.tensor(img_trans.apply_image(image).copy()).permute( 2, 0, 1) # copy to make it writeable return image, (w, h)
def get_transform(self, img): h, w = img.shape[:2] if min(h, w) >= self.short_edge_length[0]: return NoOpTransform() scale = self.short_edge_length[0] * 1.0 / min(h, w) newh = h * scale neww = w * scale neww = int(neww + 0.5) newh = int(newh + 0.5) return ResizeTransform(h, w, newh, neww, self.interp)
def __call__(self, dataset_dict): """ Args: dict: a dict in standard model input format. See tutorials for details. Returns: list[dict]: a list of dicts, which contain augmented version of the input image. The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. Each dict has field "transforms" which is a TransformList, containing the transforms that are used to generate this image. """ numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() shape = numpy_image.shape orig_shape = (dataset_dict["height"], dataset_dict["width"]) if shape[:2] != orig_shape: # It transforms the "original" image in the dataset to the input image pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1]) else: pre_tfm = NoOpTransform() # Create all combinations of augmentations to use aug_candidates = [] # each element is a list[Augmentation] for min_size in self.min_sizes: resize = ResizeShortestEdge(min_size, self.max_size) aug_candidates.append([resize]) # resize only if self.flip: flip = RandomFlip(prob=1.0) aug_candidates.append([resize, flip]) # resize + flip # Apply all the augmentations ret = [] for aug in aug_candidates: new_image, tfms = apply_augmentations(aug, np.copy(numpy_image)) torch_image = torch.from_numpy( np.ascontiguousarray(new_image.transpose(2, 0, 1))) dic = copy.deepcopy(dataset_dict) dic["transforms"] = pre_tfm + tfms dic["image"] = torch_image if self.proposal_topk is not None: image_shape = new_image.shape[:2] # h, w transform_proposals(dic, image_shape, tfms, proposal_topk=self.proposal_topk) ret.append(dic) return ret
def __call__(self, dataset_dict): """ Args: dict: a detection dataset dict in standard format Returns: list[dict]: a list of dataset dicts, which contain augmented version of the input image. The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. Each dict has field "transforms" which is a TransformList, containing the transforms that are used to generate this image. """ numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() shape = numpy_image.shape orig_shape = (dataset_dict["height"], dataset_dict["width"]) if shape[:2] != orig_shape: # It transforms the "original" image in the dataset to the input image pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1]) else: pre_tfm = NoOpTransform() # Create all combinations of augmentations to use tfm_gen_candidates = [] # each element is a list[TransformGen] for min_size in self.min_sizes: resize = ResizeShortestEdge(min_size, self.max_size) tfm_gen_candidates.append([resize]) # resize only if self.flip: flip = RandomFlip(prob=1.0) tfm_gen_candidates.append([resize, flip]) # resize + flip if self.vertical_flip: flip = RandomFlip(prob=1.0, horizontal=False, vertical=True) tfm_gen_candidates.append([resize, flip]) # resize + flip tfm_gen_candidates.append([resize, Rotation90Gen(prob=1.0)]) # tfm_gen_candidates.append([resize, CLAHEGen(prob=1.0, img_format=self.image_format)]) tfm_gen_candidates.append([ resize, Rotation90Gen(prob=1.0), Rotation90Gen(prob=1.0), Rotation90Gen(prob=1.0) ]) # Apply all the augmentations ret = [] for tfm_gen in tfm_gen_candidates: new_image, tfms = apply_transform_gens(tfm_gen, np.copy(numpy_image)) torch_image = torch.from_numpy( np.ascontiguousarray(new_image.transpose(2, 0, 1))) dic = copy.deepcopy(dataset_dict) dic["transforms"] = pre_tfm + tfms dic["image"] = torch_image # pop original image width and height to prevent rescale boxes in postprocess of detector dic.pop("width") dic.pop("height") ret.append(dic) return ret