def __call__(self, src, bbox):
        # resize with random interpolation
        h, w, _ = src.shape
        interp = np.random.randint(1, 5)
        scale = 1.2
        src = timage.imresize(src,
                              int(self._width * scale),
                              int(self._height * scale),
                              interp=interp)
        bbox = tbbox.resize(
            bbox, (w, h),
            (int(self._width * scale), int(self._height * scale)))
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random cropping
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(bbox, (w, h),
                                                  self._height,
                                                  self._width,
                                                  min_scale=0.95,
                                                  max_scale=1.05,
                                                  max_trial=50)
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
예제 #2
0
    def __call__(self, src, bbox):
        img = src

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        if not self._val:
            h, w, _ = img.shape
            img, flips = timage.random_flip(img, px=0.5)
            bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0])
        img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1])
        img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2])

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
예제 #3
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = random_color_distort(src)

        # random cropping #! keep aspect ration = 1
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(label, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = gimage.imresize(img, self._width, self._height, interp=interp)
        bbox = gbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = gimage.random_flip(img, px=0.5)
        bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0)
        gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype)
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
예제 #4
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        if (self._bilateral_kernel_size is not None) and (
                self._sigma_vals is not None) or self._grayscale:
            img = img.asnumpy()
            if (self._bilateral_kernel_size is not None) and (self._sigma_vals
                                                              is not None):
                img = cv2.bilateralFilter(img, self._bilateral_kernel_size,
                                          self._sigma_vals, self._sigma_vals)
            if self._grayscale:
                img = np.dot(img[..., :3], [0.299, 0.587, 0.114])
                img = np.repeat(img[:, :, None], 3, axis=2)
            img = nd.array(img)

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
예제 #5
0
    def __call__(self, src, label, segm):
        """Apply transform to training image/label."""
        # resize shorter side but keep in max_size
        h, w, _ = src.shape
        if self._random_resize:
            short = randint(self._short[0], self._short[1])
        else:
            short = self._short
        img = timage.resize_short_within(src, short, self._max_size, interp=1)
        bbox = tbbox.resize(label, (w, h), (img.shape[1], img.shape[0]))
        # segm = [tmask.resize(polys, (w, h), (img.shape[1], img.shape[0])) for polys in segm]

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])
        # segm = [tmask.flip(polys, (w, h), flip_x=flips[0]) for polys in segm]

        # gt_masks (n, im_height, im_width) of uint8 -> float32 (cannot take uint8)
        # masks = [mx.nd.array(tmask.to_mask(polys, (w, h))) for polys in segm]
        masks = cocomask.decode(segm) # hxwxn
        mask_list = []
        for i in range(masks.shape[-1]):
            mask = cv2.resize(masks[:,:,i], (img.shape[1],img.shape[0]),
                interpolation=cv2.INTER_NEAREST)
            mask_list.append(mx.nd.array(mask))
       # n * (im_height, im_width) -> (n, im_height, im_width)
        masks = mx.nd.stack(*mask_list, axis=0)
        if flips[0]:
            masks = mx.nd.flip(masks, axis=2)
        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype), masks

        # generate RPN target so cpu workers can help reduce the workload
        # feat_h, feat_w = (img.shape[1] // self._stride, img.shape[2] // self._stride)
        gt_bboxes = mx.nd.array(bbox[:, :4])
        if self._multi_stage:
            oshapes = []
            anchor_targets = []
            for feat_sym in self._feat_sym:
                oshapes.append(feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0])
            for anchor, oshape in zip(self._anchors, oshapes):
                anchor = anchor[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4))
                anchor_targets.append(anchor)
            anchor_targets = mx.nd.concat(*anchor_targets, dim=0)
            cls_target, box_target, box_mask = self._target_generator(
                gt_bboxes, anchor_targets, img.shape[2], img.shape[1])
        else:
            oshape = self._feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0]
            anchor = self._anchors[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4))

            cls_target, box_target, box_mask = self._target_generator(
                gt_bboxes, anchor, img.shape[2], img.shape[1])
        return img, bbox.astype(img.dtype), masks, cls_target, box_target, box_mask
예제 #6
0
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize
        h, w, _ = src.shape
        img = timage.imresize(src, self._width, self._height, interp=9)
        bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height))

        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
        return img, bbox.astype(img.dtype)
예제 #7
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        """color distort"""
        # img = random_color_distort(src)

        # print("previous label shape = ", label.shape)
        target = np.zeros(shape=(label.shape[0], ))
        """Pyramid Anchor sampling"""
        img, boxes, label = self.random_baiducrop(src, label[:, :4], target)
        # print("label shape = ", label.shape)
        # print('boxes shape =', boxes.shape)
        bbox = boxes
        # img = mx.nd.array(img)
        """color distort"""
        img = mx.nd.array(img)
        img = random_color_distort(img)

        # """random crop, keep aspect ration=1"""
        # h, w, _ = img.shape
        # bbox, crop_size = random_crop_with_constraints(label, (w, h))
        # x_offset, y_offset, new_width, new_height = crop_size
        # img = mx.image.fixed_crop(img, x_offset, y_offset, new_width, new_height)
        """resize with random interpolation"""
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = gimage.imresize(img, self._width, self._height, interp=interp)
        bbox = gbbox.resize(bbox, (w, h), (self._width, self._height))
        """random horizontal flip"""
        h, w, _ = img.shape
        img, flips = gimage.random_flip(img, px=0.5)
        bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0])
        """To Tensor & Normalization"""
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox

        # @TODO: generating training target so cpu workers can help reduce the workload on gpu
        face_anchors, head_anchors, body_anchors = self._anchors
        gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0)
        gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype)

        face_cls_targets, face_box_targets, _ = self._target_generator(
            face_anchors, None, gt_bboxes, gt_ids)

        head_cls_targets, head_box_targets, _ = self._target_generator(
            head_anchors, None, gt_bboxes, gt_ids)

        body_cls_targets, body_box_targets, _ = self._target_generator(
            body_anchors, None, gt_bboxes, gt_ids)

        return img, \
               face_cls_targets[0], head_cls_targets[0], body_cls_targets[0], \
               face_box_targets[0], head_box_targets[0], body_box_targets[0]
def transform_gt_bbox(img_path, model, bbox):
    if model == 'yolo':
        short, max_size = 416, 1024
    if model == 'rcnn':
        short, max_size = 600, 1000
    img = mx.image.imread(img_path)
    h, w, _ = img.shape
    resized_img = timage.resize_short_within(img, short, max_size)
    bbox = tbbox.resize(bbox, (w, h),
                        (resized_img.shape[1], resized_img.shape[0]))
    return bbox
예제 #9
0
def process_frame(image, net, ctx):
    # currently only supports batch size 1 todo
    image = np.squeeze(image)
    image = mx.nd.array(image, dtype='uint8')
    x, _ = transform_test(image, 600, max_size=1000)
    x = x.copyto(ctx[0])

    # get prediction results
    ids, scores, bboxes = net(x)
    oh, ow, _ = image.shape
    _, _, ih, iw = x.shape
    bboxes[0] = tbbox.resize(bboxes[0], in_size=(iw, ih), out_size=(ow, oh))
    return bboxes[0].asnumpy(), scores[0].asnumpy(), ids[0].asnumpy()
예제 #10
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        anchor_cls_targets, anchor_box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        anchor_cls_targets = mx.nd.where(anchor_cls_targets > 0,
                                         mx.nd.ones_like(anchor_cls_targets),
                                         anchor_cls_targets)
        # positive anchor is 1, negative anchor is 0 and ignored is -1.
        return img, anchor_cls_targets[0], anchor_box_targets[
            0], bbox[:, :5].astype(img.dtype)
    def __call__(self, src, bbox):
        """Apply transform to training image/label."""
        if not self._val:
            # random color jittering
            src = experimental.image.random_color_distort(src)

        img = src
        # random cropping
        h, w, _ = img.shape
        bbox, crop = random_crop_with_constraints(bbox, (w, h),
                                                  self._height,
                                                  self._width,
                                                  min_scale=0.9,
                                                  max_scale=1,
                                                  max_trial=50)
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(1, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        if not self._val:
            # random horizontal flip
            h, w, _ = img.shape
            img, flips = timage.random_flip(img, px=0.5)
            bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0])
        img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1])
        img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2])

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)
        
        #如果有anchors的输入,则执行下面的运算。计算以前的格式是:
        #gt_bboxes里面是一个图像上的box位置,实际像素点位 当前图片里面的框框个数x4
        #gt_ids里面是对应图像上的label,当前图片里面的框框个数x1
        #下面为batch_size腾出空间
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize with random interpolation
        h, w, _ = src.shape
        img = timage.imresize(src, self._width, self._height, interp=9)
        bbox = tbbox.resize(label, (w, h), (self._width, self._height))

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        return img, cls_targets[0], box_targets[0]
예제 #14
0
def new_trainloader_call(self, src, label):
    '''
    define a new call for trainloader by changing the data augmentation
    '''
    # random color jittering
    img = experimental.image.random_color_distort(src)

    # random expansion with prob 0.5
    if np.random.uniform(0, 1) > 0.5:
        img, expand = timage.random_expand(img,
                                           fill=[m * 255 for m in self._mean])
        bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1])
    else:
        img, bbox = img, label

    # random cropping
    h, w, _ = img.shape
    bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h))
    x0, y0, w, h = crop
    img = mx.image.fixed_crop(img, x0, y0, w, h)

    # resize with random interpolation
    h, w, _ = img.shape
    interp = np.random.randint(0, 5)
    img = timage.imresize(img, self._width, self._height, interp=interp)
    bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

    # to tensor
    img = mx.nd.image.to_tensor(img)
    img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
    if self._anchors is None:
        return img, bbox.astype(img.dtype)

    # generate training target so cpu workers can help reduce the workload on gpu
    gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
    gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
    cls_targets, box_targets, _ = self._target_generator(
        self._anchors, None, gt_bboxes, gt_ids)
    return img, cls_targets[0], box_targets[0]
예제 #15
0
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize
        h, w, _ = src.shape
        img = timage.imresize(src, self._width, self._height, interp=9)
        bbox = tbbox.resize(label,
                            in_size=(w, h),
                            out_size=(self._width, self._height))

        if (self._bilateral_kernel_size is not None) and (
                self._sigma_vals is not None) or self._grayscale:
            img = img.asnumpy()
            if (self._bilateral_kernel_size is not None) and (self._sigma_vals
                                                              is not None):
                img = cv2.bilateralFilter(img, self._bilateral_kernel_size,
                                          self._sigma_vals, self._sigma_vals)
            if self._grayscale:
                img = np.dot(img[..., :3], [0.299, 0.587, 0.114])
                img = np.repeat(img[:, :, None], 3, axis=2)
            img = nd.array(img)

        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
        return img, bbox.astype(img.dtype)
예제 #16
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = experimental.image.random_color_distort(src)

        # img, bbox = img,label

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = experimental.bbox.random_crop_with_constraints(
            bbox, (w, h))
        x0, y0, w, h = crop
        img = mx.image.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        # h, w, _ = img.shape
        # img, flips = timage.random_flip(img, px=0.5)
        # bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # rabdom rotation
        h, w, _ = img.shape
        clockwise_rotation_num = np.random.randint(0, 4)
        if clockwise_rotation_num == 0:
            pass
        elif clockwise_rotation_num == 1:
            ###顺时针90度
            img = nd.transpose(img, [1, 0, 2])
            img = img[:, ::-1, :]
            bbox = np.array([
                h - bbox[:, 3], bbox[:, 0], h - bbox[:, 1], bbox[:, 2],
                bbox[:, 4], bbox[:, 5]
            ]).T
            bbox[:, 5] = (bbox[:, 5] + 1) % 4
        elif clockwise_rotation_num == 2:
            ##顺时针180度
            img = img[::-1, ::-1, :]
            bbox = np.array([
                w - bbox[:, 2], h - bbox[:, 3], w - bbox[:, 0], h - bbox[:, 1],
                bbox[:, 4], bbox[:, 5]
            ]).T
            bbox[:, 5] = (bbox[:, 5] + 2) % 4
        else:
            # 顺时针270度
            img = nd.transpose(img, [1, 0, 2])
            img = img[::-1, :, :]
            bbox = np.array([
                bbox[:, 1], w - bbox[:, 2], bbox[:, 3], w - bbox[:, 0],
                bbox[:, 4], bbox[:, 5]
            ]).T
            bbox[:, 5] = (bbox[:, 5] + 3) % 4

        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
        gt_ori = mx.nd.array(bbox[np.newaxis, :, 5:6])
        cls_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ids)
        ori_targets, box_targets, _ = self._target_generator(
            self._anchors, None, gt_bboxes, gt_ori)
        return img, cls_targets[0], ori_targets[0], box_targets[0]
예제 #17
0
	def network_inference(self):
		# a = cv2.waitKey(0) # close window when ESC is pressed
		# while a is not 27:
		color_img = self.color_img
		depth_image = self.depth_image

		depth_height_res, depth_width_res = depth_image.shape

		# It is to correct the image size to fit a perfect square
		# color_img = np.zeros((640, 640, 3)).astype('uint8')
		# color_img[0:479] = color_img_raw[0:479]
		# color_img = color_img.astype('uint8')

		# Image pre-processing
		frame = mx.nd.array(cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB)).astype('uint8')
		frame = timage.imresize(frame, self.width, self.height, 1)
		frame_tensor = mx.nd.image.to_tensor(frame)
		frame_tensor = mx.nd.image.normalize(frame_tensor, mean=self.mean, std=self.std)
		
		# with TimeIt('Obj detection time'):
		# Run frame through network
		class_IDs, scores, bounding_boxes = self.net(frame_tensor.expand_dims(axis=0).as_in_context(self.ctx))
		
		# Filter bounding boxes by their scores
		fbounding_boxes, fscores, fclass_IDs = self.filter_predictions(bounding_boxes, scores, class_IDs)

		# we need to resize the bounding box back to the original resolution (640, 480) (width, height)
		resized_bbox = tbbox.resize(fbounding_boxes, (self.width, self.height), (self.depth_img_width, self.depth_img_height))
		img = timage.imresize(frame, self.depth_img_width, self.depth_img_height, 1)

		# check if the bounding box is inside the 300x300 area of the GG-CNN grasping area
		GGCNN_area = [190, 0, 480, 300]
		GGCNN_area_center = [320, 150] # width, height

		img_2 = img.asnumpy()
		img = cv2.rectangle(img_2, (GGCNN_area[0], GGCNN_area[1]), (GGCNN_area[2], GGCNN_area[3]), (255, 0, 0), 1)
				
		bbox_list, fscores_list, fclass_IDs_list = [], [], [] # bounding boxes of the chosen class
		
		# If any object is found
		if fclass_IDs.size > 0:
			# If the request object is found
			if self.pipeline_required_class in fclass_IDs:
				print('found obj')
				# we need to find all ocurrences of the class identified to consider
				# situation where we have false positives as well
				chosen_class_index = [i for i, x in enumerate(fclass_IDs) if x == self.pipeline_required_class]
				for class_index in chosen_class_index:
					bbox_list.append(resized_bbox[class_index])
					fscores_list.append(fscores[class_index])
					fclass_IDs_list.append(fclass_IDs[class_index])
					
				max_score = max(fscores_list)
				largest_score_bb_index = [i for i, x in enumerate(fscores_list) if x == max_score]
				
				bbox_list = [bbox_list[largest_score_bb_index[0]]]
				fscores_list = [fscores_list[largest_score_bb_index[0]]]
				fclass_IDs_list = [fclass_IDs_list[largest_score_bb_index[0]]]

				bbox_list = self.resize_bounding_boxes(bbox_list)
				self.labels = fclass_IDs_list
				self.bboxes = bbox_list

				for index, bbox in enumerate(bbox_list):
					# bbox_list.append(bbox)
					# fscores_list.append(fscores_list[index])
					# fclass_IDs_list.append(fclass_IDs_list[index])

					if bbox[0] > GGCNN_area[0] and bbox[1] > GGCNN_area[1] and bbox[2] < GGCNN_area[2] and \
						bbox[3] < GGCNN_area[3]:
						print('obj inside ggcnn_area')

						self.receive_bb_status = True

						# Set the flag detection_ready
						self.detection_ready.publish(True)
						self.reposition_robot_flag.publish(False)
					else:
						print('obj outside ggcnn_area')

						bbox_center_point_x = (bbox[2] - bbox[0])/2 + bbox[0] # width
						bbox_center_point_y = (bbox[3] - bbox[1])/2 + bbox[1] # height

						dist_x = bbox_center_point_x - GGCNN_area_center[0] # width
						dist_y = GGCNN_area_center[1] - bbox_center_point_y # height

						dist_x_dir = dist_x/abs(dist_x)
						dist_y_dir = dist_y/abs(dist_y)

						ggcnn_center_area = depth_image[GGCNN_area_center[1], GGCNN_area_center[0]]
						
						self.horizontal_FOV = 52
						self.vertical_FOV = 60
												
						largura_2 = 2.0 * ggcnn_center_area * np.tan(self.horizontal_FOV * abs(dist_x) / depth_width_res / 2.0 / 180.0 * np.pi) / 1000 * dist_x_dir
						altura_2 = 2.0 * ggcnn_center_area * np.tan(self.vertical_FOV * abs(dist_y) / depth_height_res / 2.0 / 180.0 * np.pi) / 1000 * dist_y_dir

						reposition_points = Float32MultiArray()
						reposition_points.data = [largura_2, altura_2]
						self.reposition_coord.publish(reposition_points)

						self.detection_ready.publish(True)
						self.reposition_robot_flag.publish(True)
			else:
				print('The object ({}) was not found'.format(self.classes[self.pipeline_required_class]))
				self.detection_ready.publish(False)
				self.reposition_robot_flag.publish(False)
		else:
			print('No objects (including the requested one ({})) were found'.format(self.classes[self.pipeline_required_class]))
			self.detection_ready.publish(False)
			self.reposition_robot_flag.publish(False)

		bbox_list = np.array(bbox_list)
		fscores_list = np.array(fscores_list)
		fclass_IDs_list = np.array(fclass_IDs_list)

		img = gcv.utils.viz.cv_plot_bbox(img, bbox_list, fscores_list, fclass_IDs_list, class_names=self.net.classes)		
		depth_image = cv2.cvtColor(depth_image, cv2.COLOR_GRAY2BGR)
		depth_image = depth_image.astype('uint8')
		img = img.astype('uint8')
		added_image = cv2.addWeighted(depth_image, 0.7, img, 0.8, 0)
			
		self.img_pub.publish(CvBridge().cv2_to_imgmsg(added_image, 'bgr8'))