def __call__(self, src, bbox):
        # resize with random interpolation
        h, w, _ = src.shape
        interp = np.random.randint(1, 5)
        scale = 1.2
        src = timage.imresize(src,
                              int(self._width * scale),
                              int(self._height * scale),
                              interp=interp)
        bbox = tbbox.resize(
            bbox, (w, h),
            (int(self._width * scale), int(self._height * scale)))
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random cropping
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(bbox, (w, h),
                                                  self._height,
                                                  self._width,
                                                  min_scale=0.95,
                                                  max_scale=1.05,
                                                  max_trial=50)
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Esempio n. 2
0
    def __call__(self, src, bbox):
        img = src

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        if not self._val:
            h, w, _ = img.shape
            img, flips = timage.random_flip(img, px=0.5)
            bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0])
        img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1])
        img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2])

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Esempio n. 3
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = random_color_distort(src)

        # random cropping #! keep aspect ration = 1
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(label, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = gimage.imresize(img, self._width, self._height, interp=interp)
        bbox = gbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = gimage.random_flip(img, px=0.5)
        bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0)
        gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype)
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
    def __call__(self, src, label, segm):
        """Apply transform to training image/label."""
        # resize shorter side but keep in max_size
        h, w, _ = src.shape
        if self._random_resize:
            short = randint(self._short[0], self._short[1])
        else:
            short = self._short
        img = timage.resize_short_within(src, short, self._max_size, interp=1)
        bbox = tbbox.resize(label, (w, h), (img.shape[1], img.shape[0]))
        # segm = [tmask.resize(polys, (w, h), (img.shape[1], img.shape[0])) for polys in segm]

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])
        # segm = [tmask.flip(polys, (w, h), flip_x=flips[0]) for polys in segm]

        # gt_masks (n, im_height, im_width) of uint8 -> float32 (cannot take uint8)
        # masks = [mx.nd.array(tmask.to_mask(polys, (w, h))) for polys in segm]
        masks = cocomask.decode(segm) # hxwxn
        mask_list = []
        for i in range(masks.shape[-1]):
            mask = cv2.resize(masks[:,:,i], (img.shape[1],img.shape[0]),
                interpolation=cv2.INTER_NEAREST)
            mask_list.append(mx.nd.array(mask))
       # n * (im_height, im_width) -> (n, im_height, im_width)
        masks = mx.nd.stack(*mask_list, axis=0)
        if flips[0]:
            masks = mx.nd.flip(masks, axis=2)
        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype), masks

        # generate RPN target so cpu workers can help reduce the workload
        # feat_h, feat_w = (img.shape[1] // self._stride, img.shape[2] // self._stride)
        gt_bboxes = mx.nd.array(bbox[:, :4])
        if self._multi_stage:
            oshapes = []
            anchor_targets = []
            for feat_sym in self._feat_sym:
                oshapes.append(feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0])
            for anchor, oshape in zip(self._anchors, oshapes):
                anchor = anchor[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4))
                anchor_targets.append(anchor)
            anchor_targets = mx.nd.concat(*anchor_targets, dim=0)
            cls_target, box_target, box_mask = self._target_generator(
                gt_bboxes, anchor_targets, img.shape[2], img.shape[1])
        else:
            oshape = self._feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0]
            anchor = self._anchors[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4))

            cls_target, box_target, box_mask = self._target_generator(
                gt_bboxes, anchor, img.shape[2], img.shape[1])
        return img, bbox.astype(img.dtype), masks, cls_target, box_target, box_mask
Esempio n. 5
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        if (self._bilateral_kernel_size is not None) and (
                self._sigma_vals is not None) or self._grayscale:
            img = img.asnumpy()
            if (self._bilateral_kernel_size is not None) and (self._sigma_vals
                                                              is not None):
                img = cv2.bilateralFilter(img, self._bilateral_kernel_size,
                                          self._sigma_vals, self._sigma_vals)
            if self._grayscale:
                img = np.dot(img[..., :3], [0.299, 0.587, 0.114])
                img = np.repeat(img[:, :, None], 3, axis=2)
            img = nd.array(img)

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Esempio n. 6
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        """color distort"""
        # img = random_color_distort(src)

        # print("previous label shape = ", label.shape)
        target = np.zeros(shape=(label.shape[0], ))
        """Pyramid Anchor sampling"""
        img, boxes, label = self.random_baiducrop(src, label[:, :4], target)
        # print("label shape = ", label.shape)
        # print('boxes shape =', boxes.shape)
        bbox = boxes
        # img = mx.nd.array(img)
        """color distort"""
        img = mx.nd.array(img)
        img = random_color_distort(img)

        # """random crop, keep aspect ration=1"""
        # h, w, _ = img.shape
        # bbox, crop_size = random_crop_with_constraints(label, (w, h))
        # x_offset, y_offset, new_width, new_height = crop_size
        # img = mx.image.fixed_crop(img, x_offset, y_offset, new_width, new_height)
        """resize with random interpolation"""
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = gimage.imresize(img, self._width, self._height, interp=interp)
        bbox = gbbox.resize(bbox, (w, h), (self._width, self._height))
        """random horizontal flip"""
        h, w, _ = img.shape
        img, flips = gimage.random_flip(img, px=0.5)
        bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0])
        """To Tensor & Normalization"""
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox

        # @TODO: generating training target so cpu workers can help reduce the workload on gpu
        face_anchors, head_anchors, body_anchors = self._anchors
        gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0)
        gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype)

        face_cls_targets, face_box_targets, _ = self._target_generator(
            face_anchors, None, gt_bboxes, gt_ids)

        head_cls_targets, head_box_targets, _ = self._target_generator(
            head_anchors, None, gt_bboxes, gt_ids)

        body_cls_targets, body_box_targets, _ = self._target_generator(
            body_anchors, None, gt_bboxes, gt_ids)

        return img, \
               face_cls_targets[0], head_cls_targets[0], body_cls_targets[0], \
               face_box_targets[0], head_box_targets[0], body_box_targets[0]
Esempio n. 7
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._target_generator is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])  # make the one-hot here
        if self._mixup:
            gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:])
        else:
            gt_mixratio = None
        objectness, center_targets, scale_targets, weights, class_targets = self._target_generator(
            self._fake_x, self._feat_maps, self._anchors, self._offsets,
            gt_bboxes, gt_ids, gt_mixratio)
        return (img, objectness[0], center_targets[0], scale_targets[0],
                weights[0], class_targets[0], gt_bboxes[0])
Esempio n. 8
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        anchor_cls_targets, anchor_box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        anchor_cls_targets = mx.nd.where(anchor_cls_targets > 0,
                                         mx.nd.ones_like(anchor_cls_targets),
                                         anchor_cls_targets)
        # positive anchor is 1, negative anchor is 0 and ignored is -1.
        return img, anchor_cls_targets[0], anchor_box_targets[
            0], bbox[:, :5].astype(img.dtype)
    def __call__(self, src, bbox):
        """Apply transform to training image/label."""
        if not self._val:
            # random color jittering
            src = experimental.image.random_color_distort(src)

        img = src
        # random cropping
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(bbox, (w, h),
                                                  self._height,
                                                  self._width,
                                                  min_scale=0.9,
                                                  max_scale=1,
                                                  max_trial=50)
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        if not self._val:
            # random horizontal flip
            h, w, _ = img.shape
            img, flips = timage.random_flip(img, px=0.5)
            bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0])
        img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1])
        img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2])

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)
        
        #如果有anchors的输入,则执行下面的运算。计算以前的格式是:
        #gt_bboxes里面是一个图像上的box位置,实际像素点位 当前图片里面的框框个数x4
        #gt_ids里面是对应图像上的label,当前图片里面的框框个数x1
        #下面为batch_size腾出空间
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]