def __call__(self, src, bbox): # resize with random interpolation h, w, _ = src.shape interp = np.random.randint(1, 5) scale = 1.2 src = timage.imresize(src, int(self._width * scale), int(self._height * scale), interp=interp) bbox = tbbox.resize( bbox, (w, h), (int(self._width * scale), int(self._height * scale))) # random color jittering img = experimental.image.random_color_distort(src) # random cropping h, w, _ = img.shape bbox, crop = random_crop_with_constraints(bbox, (w, h), self._height, self._width, min_scale=0.95, max_scale=1.05, max_trial=50) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, bbox): img = src # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) if not self._val: h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0]) img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1]) img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2]) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = random_color_distort(src) # random cropping #! keep aspect ration = 1 h, w, _ = img.shape bbox, crop = random_crop_with_constraints(label, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = gimage.imresize(img, self._width, self._height, interp=interp) bbox = gbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = gimage.random_flip(img, px=0.5) bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0) gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label, segm): """Apply transform to training image/label.""" # resize shorter side but keep in max_size h, w, _ = src.shape if self._random_resize: short = randint(self._short[0], self._short[1]) else: short = self._short img = timage.resize_short_within(src, short, self._max_size, interp=1) bbox = tbbox.resize(label, (w, h), (img.shape[1], img.shape[0])) # segm = [tmask.resize(polys, (w, h), (img.shape[1], img.shape[0])) for polys in segm] # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # segm = [tmask.flip(polys, (w, h), flip_x=flips[0]) for polys in segm] # gt_masks (n, im_height, im_width) of uint8 -> float32 (cannot take uint8) # masks = [mx.nd.array(tmask.to_mask(polys, (w, h))) for polys in segm] masks = cocomask.decode(segm) # hxwxn mask_list = [] for i in range(masks.shape[-1]): mask = cv2.resize(masks[:,:,i], (img.shape[1],img.shape[0]), interpolation=cv2.INTER_NEAREST) mask_list.append(mx.nd.array(mask)) # n * (im_height, im_width) -> (n, im_height, im_width) masks = mx.nd.stack(*mask_list, axis=0) if flips[0]: masks = mx.nd.flip(masks, axis=2) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype), masks # generate RPN target so cpu workers can help reduce the workload # feat_h, feat_w = (img.shape[1] // self._stride, img.shape[2] // self._stride) gt_bboxes = mx.nd.array(bbox[:, :4]) if self._multi_stage: oshapes = [] anchor_targets = [] for feat_sym in self._feat_sym: oshapes.append(feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0]) for anchor, oshape in zip(self._anchors, oshapes): anchor = anchor[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4)) anchor_targets.append(anchor) anchor_targets = mx.nd.concat(*anchor_targets, dim=0) cls_target, box_target, box_mask = self._target_generator( gt_bboxes, anchor_targets, img.shape[2], img.shape[1]) else: oshape = self._feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0] anchor = self._anchors[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4)) cls_target, box_target, box_mask = self._target_generator( gt_bboxes, anchor, img.shape[2], img.shape[1]) return img, bbox.astype(img.dtype), masks, cls_target, box_target, box_mask
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) if (self._bilateral_kernel_size is not None) and ( self._sigma_vals is not None) or self._grayscale: img = img.asnumpy() if (self._bilateral_kernel_size is not None) and (self._sigma_vals is not None): img = cv2.bilateralFilter(img, self._bilateral_kernel_size, self._sigma_vals, self._sigma_vals) if self._grayscale: img = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img = np.repeat(img[:, :, None], 3, axis=2) img = nd.array(img) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" """color distort""" # img = random_color_distort(src) # print("previous label shape = ", label.shape) target = np.zeros(shape=(label.shape[0], )) """Pyramid Anchor sampling""" img, boxes, label = self.random_baiducrop(src, label[:, :4], target) # print("label shape = ", label.shape) # print('boxes shape =', boxes.shape) bbox = boxes # img = mx.nd.array(img) """color distort""" img = mx.nd.array(img) img = random_color_distort(img) # """random crop, keep aspect ration=1""" # h, w, _ = img.shape # bbox, crop_size = random_crop_with_constraints(label, (w, h)) # x_offset, y_offset, new_width, new_height = crop_size # img = mx.image.fixed_crop(img, x_offset, y_offset, new_width, new_height) """resize with random interpolation""" h, w, _ = img.shape interp = np.random.randint(0, 5) img = gimage.imresize(img, self._width, self._height, interp=interp) bbox = gbbox.resize(bbox, (w, h), (self._width, self._height)) """random horizontal flip""" h, w, _ = img.shape img, flips = gimage.random_flip(img, px=0.5) bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0]) """To Tensor & Normalization""" img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox # @TODO: generating training target so cpu workers can help reduce the workload on gpu face_anchors, head_anchors, body_anchors = self._anchors gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0) gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype) face_cls_targets, face_box_targets, _ = self._target_generator( face_anchors, None, gt_bboxes, gt_ids) head_cls_targets, head_box_targets, _ = self._target_generator( head_anchors, None, gt_bboxes, gt_ids) body_cls_targets, body_box_targets, _ = self._target_generator( body_anchors, None, gt_bboxes, gt_ids) return img, \ face_cls_targets[0], head_cls_targets[0], body_cls_targets[0], \ face_box_targets[0], head_box_targets[0], body_box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._target_generator is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) # make the one-hot here if self._mixup: gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:]) else: gt_mixratio = None objectness, center_targets, scale_targets, weights, class_targets = self._target_generator( self._fake_x, self._feat_maps, self._anchors, self._offsets, gt_bboxes, gt_ids, gt_mixratio) return (img, objectness[0], center_targets[0], scale_targets[0], weights[0], class_targets[0], gt_bboxes[0])
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) anchor_cls_targets, anchor_box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) anchor_cls_targets = mx.nd.where(anchor_cls_targets > 0, mx.nd.ones_like(anchor_cls_targets), anchor_cls_targets) # positive anchor is 1, negative anchor is 0 and ignored is -1. return img, anchor_cls_targets[0], anchor_box_targets[ 0], bbox[:, :5].astype(img.dtype)
def __call__(self, src, bbox): """Apply transform to training image/label.""" if not self._val: # random color jittering src = experimental.image.random_color_distort(src) img = src # random cropping h, w, _ = img.shape bbox, crop = random_crop_with_constraints(bbox, (w, h), self._height, self._width, min_scale=0.9, max_scale=1, max_trial=50) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) if not self._val: # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0]) img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1]) img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2]) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) #如果有anchors的输入,则执行下面的运算。计算以前的格式是: #gt_bboxes里面是一个图像上的box位置,实际像素点位 当前图片里面的框框个数x4 #gt_ids里面是对应图像上的label,当前图片里面的框框个数x1 #下面为batch_size腾出空间 gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]