def __call__(self, src, bbox): # resize with random interpolation h, w, _ = src.shape interp = np.random.randint(1, 5) scale = 1.2 src = timage.imresize(src, int(self._width * scale), int(self._height * scale), interp=interp) bbox = tbbox.resize( bbox, (w, h), (int(self._width * scale), int(self._height * scale))) # random color jittering img = experimental.image.random_color_distort(src) # random cropping h, w, _ = img.shape bbox, crop = random_crop_with_constraints(bbox, (w, h), self._height, self._width, min_scale=0.95, max_scale=1.05, max_trial=50) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def test_gluon_cv(self): # create fake RGB image of 300x300 of shape: Height x Width x Channel as OpenCV expects img = mx.random.uniform(0, 255, (300, 300, 3)).astype('uint8') # resize image to 200x200. This call uses OpenCV # GluonCV is not of much use if OpenCV is not there or fails img = imresize(img, 200, 200) self.assertEqual((200, 200, 3), img.shape)
def test_gluon_cv(self): # create fake RGB image of 300x300 of shape: Height x Width x Channel as OpenCV expects img = mx.random.uniform(0, 255, (300, 300, 3)).astype('uint8') # resize image to 200x200. This call uses OpenCV # GluonCV is not of much use if OpenCV is not there or fails img = imresize(img, 200, 200) self.assertEqual((200, 200, 3), img.shape)
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = random_color_distort(src) # random cropping #! keep aspect ration = 1 h, w, _ = img.shape bbox, crop = random_crop_with_constraints(label, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = gimage.imresize(img, self._width, self._height, interp=interp) bbox = gbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = gimage.random_flip(img, px=0.5) bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0) gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, bbox): img = src # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) if not self._val: h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0]) img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1]) img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2]) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src): """Apply transform to validation image/label.""" # resize h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) img = mx.nd.image.to_tensor(img) # Converts from 0-255 to 0-1 img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) return img
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) if (self._bilateral_kernel_size is not None) and ( self._sigma_vals is not None) or self._grayscale: img = img.asnumpy() if (self._bilateral_kernel_size is not None) and (self._sigma_vals is not None): img = cv2.bilateralFilter(img, self._bilateral_kernel_size, self._sigma_vals, self._sigma_vals) if self._grayscale: img = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img = np.repeat(img[:, :, None], 3, axis=2) img = nd.array(img) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def preprocess(raw_image_buf, size=480, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): orig_image = mx.img.imdecode(raw_image_buf) img = timage.imresize(orig_image, size, size) img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=mean, std=std) return img.expand_dims(0), orig_image
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height)) img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) return img, bbox.astype(img.dtype)
def __call__(self, src, label): """Apply transform to training image/label.""" """color distort""" # img = random_color_distort(src) # print("previous label shape = ", label.shape) target = np.zeros(shape=(label.shape[0], )) """Pyramid Anchor sampling""" img, boxes, label = self.random_baiducrop(src, label[:, :4], target) # print("label shape = ", label.shape) # print('boxes shape =', boxes.shape) bbox = boxes # img = mx.nd.array(img) """color distort""" img = mx.nd.array(img) img = random_color_distort(img) # """random crop, keep aspect ration=1""" # h, w, _ = img.shape # bbox, crop_size = random_crop_with_constraints(label, (w, h)) # x_offset, y_offset, new_width, new_height = crop_size # img = mx.image.fixed_crop(img, x_offset, y_offset, new_width, new_height) """resize with random interpolation""" h, w, _ = img.shape interp = np.random.randint(0, 5) img = gimage.imresize(img, self._width, self._height, interp=interp) bbox = gbbox.resize(bbox, (w, h), (self._width, self._height)) """random horizontal flip""" h, w, _ = img.shape img, flips = gimage.random_flip(img, px=0.5) bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0]) """To Tensor & Normalization""" img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox # @TODO: generating training target so cpu workers can help reduce the workload on gpu face_anchors, head_anchors, body_anchors = self._anchors gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0) gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype) face_cls_targets, face_box_targets, _ = self._target_generator( face_anchors, None, gt_bboxes, gt_ids) head_cls_targets, head_box_targets, _ = self._target_generator( head_anchors, None, gt_bboxes, gt_ids) body_cls_targets, body_box_targets, _ = self._target_generator( body_anchors, None, gt_bboxes, gt_ids) return img, \ face_cls_targets[0], head_cls_targets[0], body_cls_targets[0], \ face_box_targets[0], head_box_targets[0], body_box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._target_generator is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) # make the one-hot here if self._mixup: gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:]) else: gt_mixratio = None objectness, center_targets, scale_targets, weights, class_targets = self._target_generator( self._fake_x, self._feat_maps, self._anchors, self._offsets, gt_bboxes, gt_ids, gt_mixratio) return (img, objectness[0], center_targets[0], scale_targets[0], weights[0], class_targets[0], gt_bboxes[0])
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) anchor_cls_targets, anchor_box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) anchor_cls_targets = mx.nd.where(anchor_cls_targets > 0, mx.nd.ones_like(anchor_cls_targets), anchor_cls_targets) # positive anchor is 1, negative anchor is 0 and ignored is -1. return img, anchor_cls_targets[0], anchor_box_targets[ 0], bbox[:, :5].astype(img.dtype)
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) #如果有anchors的输入,则执行下面的运算。计算以前的格式是: #gt_bboxes里面是一个图像上的box位置,实际像素点位 当前图片里面的框框个数x4 #gt_ids里面是对应图像上的label,当前图片里面的框框个数x1 #下面为batch_size腾出空间 gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, bbox): """Apply transform to training image/label.""" if not self._val: # random color jittering src = experimental.image.random_color_distort(src) img = src # random cropping h, w, _ = img.shape bbox, crop = random_crop_with_constraints(bbox, (w, h), self._height, self._width, min_scale=0.9, max_scale=1, max_trial=50) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) if not self._val: # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0]) img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1]) img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2]) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label, idx=None): """Apply transform to validation image/label.""" was_three = False if len(src.shape) == 3: src = mx.nd.expand_dims(src, axis=0) was_three = True # resize k, h, w, c = src.shape tmp = mx.nd.ones((k, self._height, self._width, c), ctx=src.context) for i in range(k): tmp[i] = timage.imresize(src[i], self._width, self._height, interp=9) img = tmp bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height)) img = mx.nd.image.to_tensor( img) # to tensor, also transforms from k,h,w,c to k,c,h,w # normalise for i in range(k): img[i] = mx.nd.image.normalize(img[i], mean=self._mean, std=self._std) # normalise if was_three: # remove the k dimension so backwards compat with single frame img = mx.nd.squeeze(img) # if multiple temporal outputs if isinstance(bbox, list): max_boxes = 0 gt_bboxes_t = mx.nd.ones((len(bbox), 100, 5)) * -1 # max is 100 for t in range(len(bbox)): max_boxes = max(max_boxes, bbox[t].shape[0]) gt_bboxes_t[t, :bbox[t].shape[0], :] = bbox[t].astype( gt_bboxes_t.dtype) bbox = gt_bboxes_t[:, :max_boxes, :] if idx is not None: return img, bbox.astype(img.dtype), idx return img, bbox.astype(img.dtype)
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize with random interpolation h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) bbox = tbbox.resize(label, (w, h), (self._width, self._height)) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def transform(src, label, size=640): # get im, bbox crop = try_crop(label, src.shape, size) if crop is None: crop = try_crop(label, src.shape, min(src.shape[:2])) if crop is None: crop = try_crop(label, src.shape, size, last_chance=True) if crop is None: crop = try_crop(label, src.shape, min(src.shape[:2]), last_chance=True) assert crop is not None, 'transform failure type: crop' bbox = bbox_crop(label, crop) if bbox.shape[0] == 0: bbox = np.zeros((1, 7), dtype=np.float32) im = mx.image.fixed_crop(src, *crop) if crop[2] != size: im = gimage.imresize(im, size, size) bbox[:, :4] = bbox[:, :4] * (size / crop[2]) return im, bbox
def new_trainloader_call(self, src, label): ''' define a new call for trainloader by changing the data augmentation ''' # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height)) if (self._bilateral_kernel_size is not None) and ( self._sigma_vals is not None) or self._grayscale: img = img.asnumpy() if (self._bilateral_kernel_size is not None) and (self._sigma_vals is not None): img = cv2.bilateralFilter(img, self._bilateral_kernel_size, self._sigma_vals, self._sigma_vals) if self._grayscale: img = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img = np.repeat(img[:, :, None], 3, axis=2) img = nd.array(img) img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) return img, bbox.astype(img.dtype)
def __call__(self, src, label): """Apply transform to training image/label.""" img = src was_three = False if len(img.shape) == 3: img = mx.nd.expand_dims(img, axis=0) was_three = True # random color jittering img = tvideo.random_color_distort(img) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = tvideo.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping k, h, w, c = img.shape bbox, crop = tbbox.random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = img[:, y0:y0 + h, x0:x0 + w, :] # resize with random interpolation k, h, w, c = img.shape interp = np.random.randint(0, 5) tmp = mx.nd.ones((k, self._height, self._width, c), ctx=img.context) for i in range(k): tmp[i] = timage.imresize(img[i], self._width, self._height, interp=interp) img = tmp bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip with prob 0.5 k, h, w, c = img.shape if np.random.uniform(0, 1) > 0.5: img = mx.nd.flip(img, axis=2) bbox = tbbox.flip(bbox, (w, h), flip_x=True) img = mx.nd.image.to_tensor( img) # to tensor, also transforms from k,h,w,c to k,c,h,w # normalise for i in range(k): img[i] = mx.nd.image.normalize(img[i], mean=self._mean, std=self._std) # normalise if was_three: # remove the k dimension so backwards compat with single frame img = mx.nd.squeeze(img) if self._target_generator is None: return img, bbox.astype(img.dtype) bboxs = bbox max_boxes = 0 gt_bboxes_t = mx.nd.ones((len(bboxs), 100, 4)) * -1 # max is 100 objectness_t = list() center_targets_t = list() scale_targets_t = list() weights_t = list() class_targets_t = list() for ts, bbox in enumerate(bboxs): # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) if bbox.shape[-1] == 6: # one class gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) else: gt_ids = mx.nd.array( bbox[np.newaxis, :, 4:4 + self._num_classes] ) # allow multiple classes in form of 1-hotish vector if self._mixup: gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:]) else: gt_mixratio = None objectness, center_targets, scale_targets, weights, class_targets = self._target_generator( self._fake_x, self._feat_maps, self._anchors, self._offsets, gt_bboxes, gt_ids, gt_mixratio) if len(bboxs) == 1: return (img, objectness[0], center_targets[0], scale_targets[0], weights[0], class_targets[0], gt_bboxes[0]) objectness_t.append(objectness) center_targets_t.append(center_targets) scale_targets_t.append(scale_targets) weights_t.append(weights) class_targets_t.append(class_targets) max_boxes = max(max_boxes, gt_bboxes.shape[1]) gt_bboxes_t[ts, :gt_bboxes.shape[1], :] = gt_bboxes[0] objectness_t = mx.nd.concat(*objectness_t, dim=0) center_targets_t = mx.nd.concat(*center_targets_t, dim=0) scale_targets_t = mx.nd.concat(*scale_targets_t, dim=0) weights_t = mx.nd.concat(*weights_t, dim=0) class_targets_t = mx.nd.concat(*class_targets_t, dim=0) return img, objectness_t, center_targets_t, scale_targets_t, weights_t, class_targets_t, gt_bboxes_t[:, : max_boxes, :]
def network_inference(self): # a = cv2.waitKey(0) # close window when ESC is pressed # while a is not 27: color_img = self.color_img depth_image = self.depth_image depth_height_res, depth_width_res = depth_image.shape # It is to correct the image size to fit a perfect square # color_img = np.zeros((640, 640, 3)).astype('uint8') # color_img[0:479] = color_img_raw[0:479] # color_img = color_img.astype('uint8') # Image pre-processing frame = mx.nd.array(cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB)).astype('uint8') frame = timage.imresize(frame, self.width, self.height, 1) frame_tensor = mx.nd.image.to_tensor(frame) frame_tensor = mx.nd.image.normalize(frame_tensor, mean=self.mean, std=self.std) # with TimeIt('Obj detection time'): # Run frame through network class_IDs, scores, bounding_boxes = self.net(frame_tensor.expand_dims(axis=0).as_in_context(self.ctx)) # Filter bounding boxes by their scores fbounding_boxes, fscores, fclass_IDs = self.filter_predictions(bounding_boxes, scores, class_IDs) # we need to resize the bounding box back to the original resolution (640, 480) (width, height) resized_bbox = tbbox.resize(fbounding_boxes, (self.width, self.height), (self.depth_img_width, self.depth_img_height)) img = timage.imresize(frame, self.depth_img_width, self.depth_img_height, 1) # check if the bounding box is inside the 300x300 area of the GG-CNN grasping area GGCNN_area = [190, 0, 480, 300] GGCNN_area_center = [320, 150] # width, height img_2 = img.asnumpy() img = cv2.rectangle(img_2, (GGCNN_area[0], GGCNN_area[1]), (GGCNN_area[2], GGCNN_area[3]), (255, 0, 0), 1) bbox_list, fscores_list, fclass_IDs_list = [], [], [] # bounding boxes of the chosen class # If any object is found if fclass_IDs.size > 0: # If the request object is found if self.pipeline_required_class in fclass_IDs: print('found obj') # we need to find all ocurrences of the class identified to consider # situation where we have false positives as well chosen_class_index = [i for i, x in enumerate(fclass_IDs) if x == self.pipeline_required_class] for class_index in chosen_class_index: bbox_list.append(resized_bbox[class_index]) fscores_list.append(fscores[class_index]) fclass_IDs_list.append(fclass_IDs[class_index]) max_score = max(fscores_list) largest_score_bb_index = [i for i, x in enumerate(fscores_list) if x == max_score] bbox_list = [bbox_list[largest_score_bb_index[0]]] fscores_list = [fscores_list[largest_score_bb_index[0]]] fclass_IDs_list = [fclass_IDs_list[largest_score_bb_index[0]]] bbox_list = self.resize_bounding_boxes(bbox_list) self.labels = fclass_IDs_list self.bboxes = bbox_list for index, bbox in enumerate(bbox_list): # bbox_list.append(bbox) # fscores_list.append(fscores_list[index]) # fclass_IDs_list.append(fclass_IDs_list[index]) if bbox[0] > GGCNN_area[0] and bbox[1] > GGCNN_area[1] and bbox[2] < GGCNN_area[2] and \ bbox[3] < GGCNN_area[3]: print('obj inside ggcnn_area') self.receive_bb_status = True # Set the flag detection_ready self.detection_ready.publish(True) self.reposition_robot_flag.publish(False) else: print('obj outside ggcnn_area') bbox_center_point_x = (bbox[2] - bbox[0])/2 + bbox[0] # width bbox_center_point_y = (bbox[3] - bbox[1])/2 + bbox[1] # height dist_x = bbox_center_point_x - GGCNN_area_center[0] # width dist_y = GGCNN_area_center[1] - bbox_center_point_y # height dist_x_dir = dist_x/abs(dist_x) dist_y_dir = dist_y/abs(dist_y) ggcnn_center_area = depth_image[GGCNN_area_center[1], GGCNN_area_center[0]] self.horizontal_FOV = 52 self.vertical_FOV = 60 largura_2 = 2.0 * ggcnn_center_area * np.tan(self.horizontal_FOV * abs(dist_x) / depth_width_res / 2.0 / 180.0 * np.pi) / 1000 * dist_x_dir altura_2 = 2.0 * ggcnn_center_area * np.tan(self.vertical_FOV * abs(dist_y) / depth_height_res / 2.0 / 180.0 * np.pi) / 1000 * dist_y_dir reposition_points = Float32MultiArray() reposition_points.data = [largura_2, altura_2] self.reposition_coord.publish(reposition_points) self.detection_ready.publish(True) self.reposition_robot_flag.publish(True) else: print('The object ({}) was not found'.format(self.classes[self.pipeline_required_class])) self.detection_ready.publish(False) self.reposition_robot_flag.publish(False) else: print('No objects (including the requested one ({})) were found'.format(self.classes[self.pipeline_required_class])) self.detection_ready.publish(False) self.reposition_robot_flag.publish(False) bbox_list = np.array(bbox_list) fscores_list = np.array(fscores_list) fclass_IDs_list = np.array(fclass_IDs_list) img = gcv.utils.viz.cv_plot_bbox(img, bbox_list, fscores_list, fclass_IDs_list, class_names=self.net.classes) depth_image = cv2.cvtColor(depth_image, cv2.COLOR_GRAY2BGR) depth_image = depth_image.astype('uint8') img = img.astype('uint8') added_image = cv2.addWeighted(depth_image, 0.7, img, 0.8, 0) self.img_pub.publish(CvBridge().cv2_to_imgmsg(added_image, 'bgr8'))
def __call__(self, im): im = timage.imresize(mx.nd.array(im), int(self._size * self.scale), self._size) im = mx.nd.image.to_tensor(im) im = mx.nd.image.normalize(im, mean=self._mean, std=self._std) return im
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # img, bbox = img,label # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip # h, w, _ = img.shape # img, flips = timage.random_flip(img, px=0.5) # bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # rabdom rotation h, w, _ = img.shape clockwise_rotation_num = np.random.randint(0, 4) if clockwise_rotation_num == 0: pass elif clockwise_rotation_num == 1: ###顺时针90度 img = nd.transpose(img, [1, 0, 2]) img = img[:, ::-1, :] bbox = np.array([ h - bbox[:, 3], bbox[:, 0], h - bbox[:, 1], bbox[:, 2], bbox[:, 4], bbox[:, 5] ]).T bbox[:, 5] = (bbox[:, 5] + 1) % 4 elif clockwise_rotation_num == 2: ##顺时针180度 img = img[::-1, ::-1, :] bbox = np.array([ w - bbox[:, 2], h - bbox[:, 3], w - bbox[:, 0], h - bbox[:, 1], bbox[:, 4], bbox[:, 5] ]).T bbox[:, 5] = (bbox[:, 5] + 2) % 4 else: # 顺时针270度 img = nd.transpose(img, [1, 0, 2]) img = img[::-1, :, :] bbox = np.array([ bbox[:, 1], w - bbox[:, 2], bbox[:, 3], w - bbox[:, 0], bbox[:, 4], bbox[:, 5] ]).T bbox[:, 5] = (bbox[:, 5] + 3) % 4 # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) gt_ori = mx.nd.array(bbox[np.newaxis, :, 5:6]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) ori_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ori) return img, cls_targets[0], ori_targets[0], box_targets[0]