def __call__(self, src, bbox):
        # resize with random interpolation
        h, w, _ = src.shape
        interp = np.random.randint(1, 5)
        scale = 1.2
        src = timage.imresize(src,
                              int(self._width * scale),
                              int(self._height * scale),
                              interp=interp)
        bbox = tbbox.resize(
            bbox, (w, h),
            (int(self._width * scale), int(self._height * scale)))
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random cropping
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(bbox, (w, h),
                                                  self._height,
                                                  self._width,
                                                  min_scale=0.95,
                                                  max_scale=1.05,
                                                  max_trial=50)
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Ejemplo n.º 2
0
 def test_gluon_cv(self):
     # create fake RGB image of 300x300 of shape: Height x Width x Channel as OpenCV expects
     img = mx.random.uniform(0, 255, (300, 300, 3)).astype('uint8')
     # resize image to 200x200. This call uses OpenCV
     # GluonCV is not of much use if OpenCV is not there or fails
     img = imresize(img, 200, 200)
     self.assertEqual((200, 200, 3), img.shape)
Ejemplo n.º 3
0
 def test_gluon_cv(self):
     # create fake RGB image of 300x300 of shape: Height x Width x Channel as OpenCV expects
     img = mx.random.uniform(0, 255, (300, 300, 3)).astype('uint8')
     # resize image to 200x200. This call uses OpenCV
     # GluonCV is not of much use if OpenCV is not there or fails
     img = imresize(img, 200, 200)
     self.assertEqual((200, 200, 3), img.shape)
Ejemplo n.º 4
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = random_color_distort(src)

        # random cropping #! keep aspect ration = 1
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(label, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = gimage.imresize(img, self._width, self._height, interp=interp)
        bbox = gbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = gimage.random_flip(img, px=0.5)
        bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0)
        gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype)
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Ejemplo n.º 5
0
    def __call__(self, src, bbox):
        img = src

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        if not self._val:
            h, w, _ = img.shape
            img, flips = timage.random_flip(img, px=0.5)
            bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0])
        img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1])
        img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2])

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Ejemplo n.º 6
0
 def __call__(self, src):
     """Apply transform to validation image/label."""
     # resize
     h, w, _ = src.shape
     img = timage.imresize(src, self._width, self._height, interp=9)
     img = mx.nd.image.to_tensor(img) # Converts from 0-255 to 0-1
     img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
     return img
Ejemplo n.º 7
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        if (self._bilateral_kernel_size is not None) and (
                self._sigma_vals is not None) or self._grayscale:
            img = img.asnumpy()
            if (self._bilateral_kernel_size is not None) and (self._sigma_vals
                                                              is not None):
                img = cv2.bilateralFilter(img, self._bilateral_kernel_size,
                                          self._sigma_vals, self._sigma_vals)
            if self._grayscale:
                img = np.dot(img[..., :3], [0.299, 0.587, 0.114])
                img = np.repeat(img[:, :, None], 3, axis=2)
            img = nd.array(img)

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
 def preprocess(raw_image_buf,
                size=480,
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)):
     orig_image = mx.img.imdecode(raw_image_buf)
     img = timage.imresize(orig_image, size, size)
     img = mx.nd.image.to_tensor(img)
     img = mx.nd.image.normalize(img, mean=mean, std=std)
     return img.expand_dims(0), orig_image
Ejemplo n.º 9
0
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize
        h, w, _ = src.shape
        img = timage.imresize(src, self._width, self._height, interp=9)
        bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height))

        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
        return img, bbox.astype(img.dtype)
Ejemplo n.º 10
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        """color distort"""
        # img = random_color_distort(src)

        # print("previous label shape = ", label.shape)
        target = np.zeros(shape=(label.shape[0], ))
        """Pyramid Anchor sampling"""
        img, boxes, label = self.random_baiducrop(src, label[:, :4], target)
        # print("label shape = ", label.shape)
        # print('boxes shape =', boxes.shape)
        bbox = boxes
        # img = mx.nd.array(img)
        """color distort"""
        img = mx.nd.array(img)
        img = random_color_distort(img)

        # """random crop, keep aspect ration=1"""
        # h, w, _ = img.shape
        # bbox, crop_size = random_crop_with_constraints(label, (w, h))
        # x_offset, y_offset, new_width, new_height = crop_size
        # img = mx.image.fixed_crop(img, x_offset, y_offset, new_width, new_height)
        """resize with random interpolation"""
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = gimage.imresize(img, self._width, self._height, interp=interp)
        bbox = gbbox.resize(bbox, (w, h), (self._width, self._height))
        """random horizontal flip"""
        h, w, _ = img.shape
        img, flips = gimage.random_flip(img, px=0.5)
        bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0])
        """To Tensor & Normalization"""
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox

        # @TODO: generating training target so cpu workers can help reduce the workload on gpu
        face_anchors, head_anchors, body_anchors = self._anchors
        gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0)
        gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype)

        face_cls_targets, face_box_targets, _ = self._target_generator(
            face_anchors, None, gt_bboxes, gt_ids)

        head_cls_targets, head_box_targets, _ = self._target_generator(
            head_anchors, None, gt_bboxes, gt_ids)

        body_cls_targets, body_box_targets, _ = self._target_generator(
            body_anchors, None, gt_bboxes, gt_ids)

        return img, \
               face_cls_targets[0], head_cls_targets[0], body_cls_targets[0], \
               face_box_targets[0], head_box_targets[0], body_box_targets[0]
Ejemplo n.º 11
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._target_generator is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])  # make the one-hot here
        if self._mixup:
            gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:])
        else:
            gt_mixratio = None
        objectness, center_targets, scale_targets, weights, class_targets = self._target_generator(
            self._fake_x, self._feat_maps, self._anchors, self._offsets,
            gt_bboxes, gt_ids, gt_mixratio)
        return (img, objectness[0], center_targets[0], scale_targets[0],
                weights[0], class_targets[0], gt_bboxes[0])
Ejemplo n.º 12
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        anchor_cls_targets, anchor_box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        anchor_cls_targets = mx.nd.where(anchor_cls_targets > 0,
                                         mx.nd.ones_like(anchor_cls_targets),
                                         anchor_cls_targets)
        # positive anchor is 1, negative anchor is 0 and ignored is -1.
        return img, anchor_cls_targets[0], anchor_box_targets[
            0], bbox[:, :5].astype(img.dtype)
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)
        
        #如果有anchors的输入,则执行下面的运算。计算以前的格式是:
        #gt_bboxes里面是一个图像上的box位置,实际像素点位 当前图片里面的框框个数x4
        #gt_ids里面是对应图像上的label,当前图片里面的框框个数x1
        #下面为batch_size腾出空间
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
    def __call__(self, src, bbox):
        """Apply transform to training image/label."""
        if not self._val:
            # random color jittering
            src = experimental.image.random_color_distort(src)

        img = src
        # random cropping
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(bbox, (w, h),
                                                  self._height,
                                                  self._width,
                                                  min_scale=0.9,
                                                  max_scale=1,
                                                  max_trial=50)
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        if not self._val:
            # random horizontal flip
            h, w, _ = img.shape
            img, flips = timage.random_flip(img, px=0.5)
            bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0])
        img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1])
        img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2])

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Ejemplo n.º 15
0
    def __call__(self, src, label, idx=None):
        """Apply transform to validation image/label."""
        was_three = False
        if len(src.shape) == 3:
            src = mx.nd.expand_dims(src, axis=0)
            was_three = True

        # resize
        k, h, w, c = src.shape
        tmp = mx.nd.ones((k, self._height, self._width, c), ctx=src.context)
        for i in range(k):
            tmp[i] = timage.imresize(src[i],
                                     self._width,
                                     self._height,
                                     interp=9)
        img = tmp
        bbox = tbbox.resize(label,
                            in_size=(w, h),
                            out_size=(self._width, self._height))

        img = mx.nd.image.to_tensor(
            img)  # to tensor, also transforms from k,h,w,c to k,c,h,w
        # normalise
        for i in range(k):
            img[i] = mx.nd.image.normalize(img[i],
                                           mean=self._mean,
                                           std=self._std)  # normalise

        if was_three:  # remove the k dimension so backwards compat with single frame
            img = mx.nd.squeeze(img)

        # if multiple temporal outputs
        if isinstance(bbox, list):
            max_boxes = 0
            gt_bboxes_t = mx.nd.ones((len(bbox), 100, 5)) * -1  # max is 100
            for t in range(len(bbox)):
                max_boxes = max(max_boxes, bbox[t].shape[0])
                gt_bboxes_t[t, :bbox[t].shape[0], :] = bbox[t].astype(
                    gt_bboxes_t.dtype)
            bbox = gt_bboxes_t[:, :max_boxes, :]

        if idx is not None:
            return img, bbox.astype(img.dtype), idx
        return img, bbox.astype(img.dtype)
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize with random interpolation
        h, w, _ = src.shape
        img = timage.imresize(src, self._width, self._height, interp=9)
        bbox = tbbox.resize(label, (w, h), (self._width, self._height))

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
Ejemplo n.º 17
0
def transform(src, label, size=640):
    # get im, bbox
    crop = try_crop(label, src.shape, size)
    if crop is None:
        crop = try_crop(label, src.shape, min(src.shape[:2]))
        if crop is None:
            crop = try_crop(label, src.shape, size, last_chance=True)
            if crop is None:
                crop = try_crop(label,
                                src.shape,
                                min(src.shape[:2]),
                                last_chance=True)
    assert crop is not None, 'transform failure type: crop'
    bbox = bbox_crop(label, crop)
    if bbox.shape[0] == 0:
        bbox = np.zeros((1, 7), dtype=np.float32)
    im = mx.image.fixed_crop(src, *crop)
    if crop[2] != size:
        im = gimage.imresize(im, size, size)
        bbox[:, :4] = bbox[:, :4] * (size / crop[2])
    return im, bbox
Ejemplo n.º 18
0
def new_trainloader_call(self, src, label):
    '''
    define a new call for trainloader by changing the data augmentation
    '''
    # random color jittering
    img = experimental.image.random_color_distort(src)

    # random expansion with prob 0.5
    if np.random.uniform(0, 1) > 0.5:
        img, expand = timage.random_expand(img,
                                           fill=[m * 255 for m in self._mean])
        bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1])
    else:
        img, bbox = img, label

    # random cropping
    h, w, _ = img.shape
    bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h))
    x0, y0, w, h = crop
    img = mx.image.fixed_crop(img, x0, y0, w, h)

    # resize with random interpolation
    h, w, _ = img.shape
    interp = np.random.randint(0, 5)
    img = timage.imresize(img, self._width, self._height, interp=interp)
    bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

    # to tensor
    img = mx.nd.image.to_tensor(img)
    img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
    if self._anchors is None:
        return img, bbox.astype(img.dtype)

    # generate training target so cpu workers can help reduce the workload on gpu
    gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
    gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
    cls_targets, box_targets, _ = self._target_generator(
        self._anchors, None, gt_bboxes, gt_ids)
    return img, cls_targets[0], box_targets[0]
Ejemplo n.º 19
0
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize
        h, w, _ = src.shape
        img = timage.imresize(src, self._width, self._height, interp=9)
        bbox = tbbox.resize(label,
                            in_size=(w, h),
                            out_size=(self._width, self._height))

        if (self._bilateral_kernel_size is not None) and (
                self._sigma_vals is not None) or self._grayscale:
            img = img.asnumpy()
            if (self._bilateral_kernel_size is not None) and (self._sigma_vals
                                                              is not None):
                img = cv2.bilateralFilter(img, self._bilateral_kernel_size,
                                          self._sigma_vals, self._sigma_vals)
            if self._grayscale:
                img = np.dot(img[..., :3], [0.299, 0.587, 0.114])
                img = np.repeat(img[:, :, None], 3, axis=2)
            img = nd.array(img)

        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
        return img, bbox.astype(img.dtype)
Ejemplo n.º 20
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""

        img = src
        was_three = False
        if len(img.shape) == 3:
            img = mx.nd.expand_dims(img, axis=0)
            was_three = True

        # random color jittering
        img = tvideo.random_color_distort(img)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = tvideo.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        k, h, w, c = img.shape
        bbox, crop = tbbox.random_crop_with_constraints(bbox, (w, h))
        x0, y0, w, h = crop
        img = img[:, y0:y0 + h, x0:x0 + w, :]

        # resize with random interpolation
        k, h, w, c = img.shape
        interp = np.random.randint(0, 5)
        tmp = mx.nd.ones((k, self._height, self._width, c), ctx=img.context)
        for i in range(k):
            tmp[i] = timage.imresize(img[i],
                                     self._width,
                                     self._height,
                                     interp=interp)
        img = tmp
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip with prob 0.5
        k, h, w, c = img.shape
        if np.random.uniform(0, 1) > 0.5:
            img = mx.nd.flip(img, axis=2)
            bbox = tbbox.flip(bbox, (w, h), flip_x=True)

        img = mx.nd.image.to_tensor(
            img)  # to tensor, also transforms from k,h,w,c to k,c,h,w
        # normalise
        for i in range(k):
            img[i] = mx.nd.image.normalize(img[i],
                                           mean=self._mean,
                                           std=self._std)  # normalise

        if was_three:  # remove the k dimension so backwards compat with single frame
            img = mx.nd.squeeze(img)

        if self._target_generator is None:
            return img, bbox.astype(img.dtype)

        bboxs = bbox

        max_boxes = 0
        gt_bboxes_t = mx.nd.ones((len(bboxs), 100, 4)) * -1  # max is 100
        objectness_t = list()
        center_targets_t = list()
        scale_targets_t = list()
        weights_t = list()
        class_targets_t = list()
        for ts, bbox in enumerate(bboxs):
            # generate training target so cpu workers can help reduce the workload on gpu
            gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
            if bbox.shape[-1] == 6:  # one class
                gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
            else:
                gt_ids = mx.nd.array(
                    bbox[np.newaxis, :, 4:4 + self._num_classes]
                )  # allow multiple classes in form of 1-hotish vector

            if self._mixup:
                gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:])
            else:
                gt_mixratio = None
            objectness, center_targets, scale_targets, weights, class_targets = self._target_generator(
                self._fake_x, self._feat_maps, self._anchors, self._offsets,
                gt_bboxes, gt_ids, gt_mixratio)

            if len(bboxs) == 1:
                return (img, objectness[0], center_targets[0],
                        scale_targets[0], weights[0], class_targets[0],
                        gt_bboxes[0])

            objectness_t.append(objectness)
            center_targets_t.append(center_targets)
            scale_targets_t.append(scale_targets)
            weights_t.append(weights)
            class_targets_t.append(class_targets)

            max_boxes = max(max_boxes, gt_bboxes.shape[1])
            gt_bboxes_t[ts, :gt_bboxes.shape[1], :] = gt_bboxes[0]

        objectness_t = mx.nd.concat(*objectness_t, dim=0)
        center_targets_t = mx.nd.concat(*center_targets_t, dim=0)
        scale_targets_t = mx.nd.concat(*scale_targets_t, dim=0)
        weights_t = mx.nd.concat(*weights_t, dim=0)
        class_targets_t = mx.nd.concat(*class_targets_t, dim=0)

        return img, objectness_t, center_targets_t, scale_targets_t, weights_t, class_targets_t, gt_bboxes_t[:, :
                                                                                                             max_boxes, :]
Ejemplo n.º 21
0
	def network_inference(self):
		# a = cv2.waitKey(0) # close window when ESC is pressed
		# while a is not 27:
		color_img = self.color_img
		depth_image = self.depth_image

		depth_height_res, depth_width_res = depth_image.shape

		# It is to correct the image size to fit a perfect square
		# color_img = np.zeros((640, 640, 3)).astype('uint8')
		# color_img[0:479] = color_img_raw[0:479]
		# color_img = color_img.astype('uint8')

		# Image pre-processing
		frame = mx.nd.array(cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB)).astype('uint8')
		frame = timage.imresize(frame, self.width, self.height, 1)
		frame_tensor = mx.nd.image.to_tensor(frame)
		frame_tensor = mx.nd.image.normalize(frame_tensor, mean=self.mean, std=self.std)
		
		# with TimeIt('Obj detection time'):
		# Run frame through network
		class_IDs, scores, bounding_boxes = self.net(frame_tensor.expand_dims(axis=0).as_in_context(self.ctx))
		
		# Filter bounding boxes by their scores
		fbounding_boxes, fscores, fclass_IDs = self.filter_predictions(bounding_boxes, scores, class_IDs)

		# we need to resize the bounding box back to the original resolution (640, 480) (width, height)
		resized_bbox = tbbox.resize(fbounding_boxes, (self.width, self.height), (self.depth_img_width, self.depth_img_height))
		img = timage.imresize(frame, self.depth_img_width, self.depth_img_height, 1)

		# check if the bounding box is inside the 300x300 area of the GG-CNN grasping area
		GGCNN_area = [190, 0, 480, 300]
		GGCNN_area_center = [320, 150] # width, height

		img_2 = img.asnumpy()
		img = cv2.rectangle(img_2, (GGCNN_area[0], GGCNN_area[1]), (GGCNN_area[2], GGCNN_area[3]), (255, 0, 0), 1)
				
		bbox_list, fscores_list, fclass_IDs_list = [], [], [] # bounding boxes of the chosen class
		
		# If any object is found
		if fclass_IDs.size > 0:
			# If the request object is found
			if self.pipeline_required_class in fclass_IDs:
				print('found obj')
				# we need to find all ocurrences of the class identified to consider
				# situation where we have false positives as well
				chosen_class_index = [i for i, x in enumerate(fclass_IDs) if x == self.pipeline_required_class]
				for class_index in chosen_class_index:
					bbox_list.append(resized_bbox[class_index])
					fscores_list.append(fscores[class_index])
					fclass_IDs_list.append(fclass_IDs[class_index])
					
				max_score = max(fscores_list)
				largest_score_bb_index = [i for i, x in enumerate(fscores_list) if x == max_score]
				
				bbox_list = [bbox_list[largest_score_bb_index[0]]]
				fscores_list = [fscores_list[largest_score_bb_index[0]]]
				fclass_IDs_list = [fclass_IDs_list[largest_score_bb_index[0]]]

				bbox_list = self.resize_bounding_boxes(bbox_list)
				self.labels = fclass_IDs_list
				self.bboxes = bbox_list

				for index, bbox in enumerate(bbox_list):
					# bbox_list.append(bbox)
					# fscores_list.append(fscores_list[index])
					# fclass_IDs_list.append(fclass_IDs_list[index])

					if bbox[0] > GGCNN_area[0] and bbox[1] > GGCNN_area[1] and bbox[2] < GGCNN_area[2] and \
						bbox[3] < GGCNN_area[3]:
						print('obj inside ggcnn_area')

						self.receive_bb_status = True

						# Set the flag detection_ready
						self.detection_ready.publish(True)
						self.reposition_robot_flag.publish(False)
					else:
						print('obj outside ggcnn_area')

						bbox_center_point_x = (bbox[2] - bbox[0])/2 + bbox[0] # width
						bbox_center_point_y = (bbox[3] - bbox[1])/2 + bbox[1] # height

						dist_x = bbox_center_point_x - GGCNN_area_center[0] # width
						dist_y = GGCNN_area_center[1] - bbox_center_point_y # height

						dist_x_dir = dist_x/abs(dist_x)
						dist_y_dir = dist_y/abs(dist_y)

						ggcnn_center_area = depth_image[GGCNN_area_center[1], GGCNN_area_center[0]]
						
						self.horizontal_FOV = 52
						self.vertical_FOV = 60
												
						largura_2 = 2.0 * ggcnn_center_area * np.tan(self.horizontal_FOV * abs(dist_x) / depth_width_res / 2.0 / 180.0 * np.pi) / 1000 * dist_x_dir
						altura_2 = 2.0 * ggcnn_center_area * np.tan(self.vertical_FOV * abs(dist_y) / depth_height_res / 2.0 / 180.0 * np.pi) / 1000 * dist_y_dir

						reposition_points = Float32MultiArray()
						reposition_points.data = [largura_2, altura_2]
						self.reposition_coord.publish(reposition_points)

						self.detection_ready.publish(True)
						self.reposition_robot_flag.publish(True)
			else:
				print('The object ({}) was not found'.format(self.classes[self.pipeline_required_class]))
				self.detection_ready.publish(False)
				self.reposition_robot_flag.publish(False)
		else:
			print('No objects (including the requested one ({})) were found'.format(self.classes[self.pipeline_required_class]))
			self.detection_ready.publish(False)
			self.reposition_robot_flag.publish(False)

		bbox_list = np.array(bbox_list)
		fscores_list = np.array(fscores_list)
		fclass_IDs_list = np.array(fclass_IDs_list)

		img = gcv.utils.viz.cv_plot_bbox(img, bbox_list, fscores_list, fclass_IDs_list, class_names=self.net.classes)		
		depth_image = cv2.cvtColor(depth_image, cv2.COLOR_GRAY2BGR)
		depth_image = depth_image.astype('uint8')
		img = img.astype('uint8')
		added_image = cv2.addWeighted(depth_image, 0.7, img, 0.8, 0)
			
		self.img_pub.publish(CvBridge().cv2_to_imgmsg(added_image, 'bgr8'))
Ejemplo n.º 22
0
 def __call__(self, im):
     im = timage.imresize(mx.nd.array(im), int(self._size * self.scale),
                          self._size)
     im = mx.nd.image.to_tensor(im)
     im = mx.nd.image.normalize(im, mean=self._mean, std=self._std)
     return im
Ejemplo n.º 23
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # img, bbox = img,label

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        # h, w, _ = img.shape
        # img, flips = timage.random_flip(img, px=0.5)
        # bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # rabdom rotation
        h, w, _ = img.shape
        clockwise_rotation_num = np.random.randint(0, 4)
        if clockwise_rotation_num == 0:
            pass
        elif clockwise_rotation_num == 1:
            ###顺时针90度
            img = nd.transpose(img, [1, 0, 2])
            img = img[:, ::-1, :]
            bbox = np.array([
                h - bbox[:, 3], bbox[:, 0], h - bbox[:, 1], bbox[:, 2],
                bbox[:, 4], bbox[:, 5]
            ]).T
            bbox[:, 5] = (bbox[:, 5] + 1) % 4
        elif clockwise_rotation_num == 2:
            ##顺时针180度
            img = img[::-1, ::-1, :]
            bbox = np.array([
                w - bbox[:, 2], h - bbox[:, 3], w - bbox[:, 0], h - bbox[:, 1],
                bbox[:, 4], bbox[:, 5]
            ]).T
            bbox[:, 5] = (bbox[:, 5] + 2) % 4
        else:
            # 顺时针270度
            img = nd.transpose(img, [1, 0, 2])
            img = img[::-1, :, :]
            bbox = np.array([
                bbox[:, 1], w - bbox[:, 2], bbox[:, 3], w - bbox[:, 0],
                bbox[:, 4], bbox[:, 5]
            ]).T
            bbox[:, 5] = (bbox[:, 5] + 3) % 4

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        gt_ori = mx.nd.array(bbox[np.newaxis, :, 5:6])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        ori_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ori)
        return img, cls_targets[0], ori_targets[0], box_targets[0]