def get_subwindow_tracking(z, pos_x, pos_y, model_sz, original_sz, avgChans, ctx=mx.cpu()): if original_sz is None: original_sz = model_sz sz = original_sz im_sz = np.shape(z) cen = (sz - 1) / 2 context_xmin = np.floor(pos_x - cen) context_xmax = context_xmin + sz - 1 context_ymin = np.floor(pos_y - cen) context_ymax = context_ymin + sz - 1 left_pad = nd.maximum(0, 1 - context_xmin) top_pad = nd.maximum(0, 1 - context_ymin) right_pad = nd.maximum(0, context_xmax - im_sz[1]) bottom_pad = nd.maximum(0, context_ymax - im_sz[0]) context_xmin = context_xmin + left_pad; context_xmax = context_xmax + left_pad; context_ymin = context_ymin + top_pad; context_ymax = context_ymax + top_pad; paddings = [0, 0, 0, 0, int(top_pad), int(bottom_pad), int(left_pad), int(right_pad)] if avgChans is not None: im_padded_ = z - avgChans im_padded_ = nd.expand_dims(im_padded_, axis = 0) # B H W C im_padded_ = nd.transpose(im_padded_, axes=(0,3,1,2)) # B C H W im_padded_ = nd.pad(im_padded_, pad_width=paddings, mode='constant') im_padded_ = nd.transpose(im_padded_, axes=(0,2,3,1)) # B H W C if avgChans is not None: im_padded_ = im_padded_ + avgChans im_padded = im_padded_[0] im_patch_original = im_padded[int(context_ymin - 1) : int(context_ymax), int(context_xmin - 1) : int(context_xmax), :] if int(model_sz) != int(original_sz): sz_dst_w = np.round(im_patch_original.shape[1] / original_sz * model_sz) sz_dst_h = np.round(im_patch_original.shape[0] / original_sz * model_sz) im_patch = image.fixed_crop(im_patch_original, x0 = 0, y0 = 0, w = im_patch_original.shape[1], h = im_patch_original.shape[0], size = [int(sz_dst_w), int(sz_dst_h)], interp = 1 ) if im_patch.shape[0] != model_sz: im_patch = image.fixed_crop(im_patch_original, x0 = 0, y0 = 0, w = im_patch_original.shape[1], h = im_patch_original.shape[0], size = [int(model_sz), int(model_sz)], interp = 1 ) else: im_patch = im_patch_original return im_patch, im_patch_original
def fixed_crop(src, x0, y0, w, h, size=None, interp=2): """Crop src at fixed location, and (optionally) resize it to size. Input image NDArray should has dim_order of 'HWC'. Parameters ---------- src : NDArray Input image x0 : int Left boundary of the cropping area y0 : int Top boundary of the cropping area w : int Width of the cropping area h : int Height of the cropping area size : tuple of (w, h) Optional, resize to new size after cropping interp : int, optional, default=2 Interpolation method. See resize for details. Returns ------- NDArray An `NDArray` containing the cropped image. """ return img.fixed_crop(src, x0, y0, w, h, size, interp)
def fixed_crop(src, x0, y0, w, h, size=None, interp=2): """Crop src at fixed location, and (optionally) resize it to size. Input image NDArray should has dim_order of 'HWC'. Parameters ---------- src : NDArray Input image x0 : int Left boundary of the cropping area y0 : int Top boundary of the cropping area w : int Width of the cropping area h : int Height of the cropping area size : tuple of (w, h) Optional, resize to new size after cropping interp : int, optional, default=2 Interpolation method. See resize for details. Returns ------- NDArray An `NDArray` containing the cropped image. """ return img.fixed_crop(src, x0, y0, w, h, size, interp)
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = mx_img.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = nd.image.to_tensor(img) img = nd.image.normalize(img, mean=self._mean, std=self._std) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = nd.array(bbox[np.newaxis, :, :4]) gt_ids = nd.array(bbox[np.newaxis, :, 4:5]) if self._mixup: gt_mixratio = nd.array(bbox[np.newaxis, :, -1:]) else: gt_mixratio = None objectness, center_targets, scale_targets, weights, class_targets = PrefetchTargetGenerator( self._num_classes, self._height, self._width, self._anchors, self._offsets, gt_bboxes, gt_ids, gt_mixratio) return (img, center_targets[0], scale_targets[0], weights[0], objectness[0], class_targets[0], gt_bboxes[0])
def crop_resize_normalize(img, bbox_list, output_size): output_list = [] transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) for bbox in bbox_list: x0 = max(int(bbox[0]), 0) y0 = max(int(bbox[1]), 0) x1 = min(int(bbox[2]), int(img.shape[1])) y1 = min(int(bbox[3]), int(img.shape[0])) w = x1 - x0 h = y1 - y0 res_img = image.fixed_crop(nd.array(img), x0, y0, w, h, (output_size[1], output_size[0])) res_img = transform_test(res_img) output_list.append(res_img) output_array = nd.stack(*output_list) return output_array
def crop_resize_normalize(img, bbox_list, output_size): output_list = [] transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) for bbox in bbox_list: x0 = max(int(bbox[0]), 0) y0 = max(int(bbox[1]), 0) x1 = min(int(bbox[2]), int(img.shape[1])) y1 = min(int(bbox[3]), int(img.shape[0])) w = x1 - x0 h = y1 - y0 res_img = image.fixed_crop(nd.array(img), x0, y0, w, h, (output_size[1], output_size[0])) res_img = transform_test(res_img) output_list.append(res_img) output_array = nd.stack(*output_list) return output_array
def rand_crop(data, label, shape): data, rect = image.random_crop(data, shape) label = image.fixed_crop(label, *rect) return data, label
def __voc_rand_crop(self, feature, label, height, width): feature, rect = image.random_crop(feature, (width, height)) label = image.fixed_crop(label, *rect) return feature, label
def voc_rand_crop(feature, label, height, width): """Randomly crop for both feature and label images.""" feature, rect = image.random_crop(feature, (width, height)) label = image.fixed_crop(label, *rect) return feature, label
def voc_rand_crop(feature, label, height, width): """Random cropping for images of the Pascal VOC2012 Dataset.""" feature, rect = image.random_crop(feature, (width, height)) label = image.fixed_crop(label, *rect) return feature, label
'wd': 1e-3 }) print("start train...\n") d2l.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs=1) # 5 print("end train...\n") #9.10.6-预测像素类别 def predict(img): X = test_iter._dataset.normalize_image(img) X = X.transpose((2, 0, 1)).expand_dims(axis=0) pred = nd.argmax(net(X.as_in_context(ctx[0])), axis=1) return pred.reshape((pred.shape[1], pred.shape[2])) #可视化像素的预测类别,映射回数据集中的标注颜色 def label2image(pred): colormap = nd.array(d2l.VOC_COLORMAP, ctx=ctx[0], dtype='uint8') X = pred.astype('int32') return colormap[X, :] test_images, test_labels = d2l.read_voc_images(is_train=False) n, imgs = 4, [] for i in range(n): crop_rect = (0, 0, 480, 320) X = image.fixed_crop(test_images[i], *crop_rect) pred = label2image(predict(X)) imgs += [X, pred, image.fixed_crop(test_labels[i], *crop_rect)] d2l.show_images(imgs[::3] + imgs[1::3] + imgs[2::3], 3, n)
def voc_rand_crop(img, label, height, width): img, rect = image.random_crop(img, (width, height)) label = image.fixed_crop(label, *rect) return img, label
ctx = mx.cpu(0) ############################################################################## # Prepare the image # ----------------- # # download the example image # url = 'https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/segmentation/voc_examples/1.jpg' filename = 'example.jpg' # gluoncv.utils.download(url, filename) ############################################################################## # load the image img = image.imread(filename) img = image.fixed_crop(src=img, x0=0, y0=0, w =img.shape[1], h=img.shape[0], size=(224, 224)) from matplotlib import pyplot as plt # plt.imshow(img.asnumpy()) # plt.show() ############################################################################## # normalize the image using dataset mean from gluoncv.data.transforms.presets.segmentation import test_transform img = test_transform(img, ctx) ############################################################################## # Load the pre-trained model and make prediction # ---------------------------------------------- # # get pre-trained model model = gluoncv.model_zoo.get_model('fcn_resnet101_voc', pretrained=True)
def RandomCrop(data, label, height, width): data, rect = image.random_crop(data,(height, width)) label = image.fixed_crop(label,*rect) return data, label
def voc_rand_crop(data, label, height, width): """Random cropping for images of the Pascal VOC2012 Dataset.""" data, rect = image.random_crop(data, (width, height)) label = image.fixed_crop(label, *rect) return data, label
def rand_crop(self, data, label, height, width): data, rect = image.random_crop(data, (width, height)) label = image.fixed_crop(label, *rect) return data, label
def rand_crop(data, label, height, width): data, rect = random_crop(data, (width, height)) label = fixed_crop(label, *rect) return data, label