def pre_processing(self, img_path): """ pre_processing :param img_path: path of img :return: """ data_augment = False # if self.phase == 'train' and np.random.rand() > 0.5: # data_augment = True if data_augment: img_h = 40 img_w = 340 else: img_h = self.img_h img_w = self.img_w img = image.imdecode(open(img_path, 'rb').read(), 1 if self.img_channel == 3 else 0) h, w = img.shape[:2] ratio_h = float(img_h) / h new_w = int(w * ratio_h) ################ # img = image.imresize(img, w=self.img_w, h=self.img_h) if new_w < img_w: img = image.imresize(img, w=new_w, h=img_h) step = nd.zeros((img_h, img_w - new_w, self.img_channel), dtype=img.dtype) img = nd.concat(img, step, dim=1) else: img = image.imresize(img, w=img_w, h=img_h) # if data_augment: # img, _ = image.random_crop(img, (self.img_w, self.img_h)) return img
def pre_processing(self, img_path): """ 对图片进行处理,先按照高度进行resize,resize之后如果宽度不足指定宽度,就补黑色像素,否则就强行缩放到指定宽度 :param img_path: 图片地址 :return: """ data_augment = False if self.phase == 'train' and np.random.rand() > 0.5: data_augment = True if data_augment: img_h = 40 img_w = 340 else: img_h = self.img_h img_w = self.img_w img = image.imdecode( open(img_path, 'rb').read(), 1 if self.img_channel == 3 else 0) h, w = img.shape[:2] ratio_h = float(img_h) / h new_w = int(w * ratio_h) if new_w < img_w: img = image.imresize(img, w=new_w, h=img_h) step = nd.zeros((img_h, img_w - new_w, self.img_channel), dtype=img.dtype) img = nd.concat(img, step, dim=1) else: img = image.imresize(img, w=img_w, h=img_h) if data_augment: img, _ = image.random_crop(img, (self.img_w, self.img_h)) return img
def cropImg(img, bbox, det=False): x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3] w = bbox[3] h = bbox[2] p = ((w + h) / 2) A = int((np.sqrt((w + p) * (h + p)))) center = np.array([x + (0.5 * w), y + (0.5 * h)]) center = center.astype('int32') luy = np.clip((center[1] - (A // 2)), 0, img.shape[0]) rdy = np.clip((center[1] + (A // 2)), 0, img.shape[0]) lux = np.clip((center[0] - (A // 2)), 0, img.shape[1]) rdx = np.clip((center[0] + (A // 2)), 0, img.shape[1]) img = img[luy:rdy, lux:rdx, :] if not det: img = image.imresize(img, 127, 127) if det: img = image.imresize(img, 255, 255) img = img.astype('float32') / 255 norm_img = mx.image.color_normalize(img, mean=mx.nd.array([0.485, 0.456, 0.406]), std=mx.nd.array([0.229, 0.224, 0.225])) norm_img = norm_img.expand_dims(0) norm_img = mx.ndarray.transpose(norm_img, (0, 3, 1, 2)) return norm_img
def _test_crop_resize_with_diff_type(dtype): # test normal case data_in = nd.arange(60).reshape((5, 4, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 3, 2)(data_in) out_np = out_nd.asnumpy() assert(out_np.sum() == 180) assert((out_np[0:2,1,1].flatten() == [4, 16]).all()) # test 4D input data_bath_in = nd.arange(180).reshape((2, 6, 5, 3)).astype(dtype) out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in) out_batch_np = out_batch_nd.asnumpy() assert(out_batch_np.sum() == 7524) assert((out_batch_np[0:2,0:4,1,1].flatten() == [37, 52, 67, 82, 127, 142, 157, 172]).all()) # test normal case with resize data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 2)(data_in) data_expected = image.imresize(nd.slice(data_in, (0, 0, 0), (50, 100 , 3)), 25, 25, 2) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test 4D input with resize data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 2)(data_bath_in) for i in range(len(out_batch_nd)): assert_almost_equal(image.imresize(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3)), 25, 25, 2).asnumpy(), out_batch_nd[i].asnumpy()) # test with resize height and width should be greater than 0 transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 2) assertRaises(MXNetError, transformer, data_in) # test height and width should be greater than 0 transformer = transforms.CropResize(0, 0, -100, -50) assertRaises(MXNetError, transformer, data_in) # test cropped area is bigger than input data transformer = transforms.CropResize(150, 200, 200, 500) assertRaises(MXNetError, transformer, data_in) assertRaises(MXNetError, transformer, data_bath_in)
def __getitem__(self, item): if self.colormap is None: data = image.imresize(self.data[item], self.resize[0], self.resize[1]) label = image.imresize(self.label[item], self.resize[0], self.resize[1]) return data.transpose((2, 0, 1)), label.transpose((2, 0, 1)) if self.colormap != None: data = image.imresize(self.data[item], self.resize[0], self.resize[1]) label = image.imresize(self.label[item], self.resize[0], self.resize[1]) return data.transpose((2, 0, 1)), self.label_indices(label)
def transform_train(data, label): #将图像调整为不同的大小,分别进行数据扩增,因为是使用两个模型进行融合,所以也使用两类数据分别给两个网络进行训练 im1 = image.imresize(data.astype('float32') / 255, 224, 224) #将图像调整为224x224,data的每个像素点除以255,保证在[0-1] im2 = image.imresize(data.astype('float32') / 255, 299, 299) #数据增强参数1,给第一个网络 auglist1 = image.CreateAugmenter(data_shape=(3, 224, 224), resize=0, rand_crop=False, rand_resize=False, rand_mirror=True, mean=np.array([0.485, 0.456, 0.406]), std=np.array([0.229, 0.224, 0.225]), brightness=0, contrast=0, saturation=0, hue=0, pca_noise=0, rand_gray=0, inter_method=2) # 数据增强参数2,给第二个网络 auglist2 = image.CreateAugmenter(data_shape=(3, 299, 299), resize=0, rand_crop=False, rand_resize=False, rand_mirror=True, mean=np.array([0.485, 0.456, 0.406]), std=np.array([0.229, 0.224, 0.225]), brightness=0, contrast=0, saturation=0, hue=0, pca_noise=0, rand_gray=0, inter_method=2) #分别进行增强 for aug in auglist1: im1 = aug(im1) for aug in auglist2: im2 = aug(im2) # 将数据格式从"高*宽*通道"改为"通道*高*宽"。 im1 = nd.transpose(im1, (2, 0, 1)) im2 = nd.transpose(im2, (2, 0, 1)) #返回给两个数据,分别给两个网络进行训练 return (im1, im2, nd.array([label]).asscalar().astype('float32'))
def __resize(self, feature, label, height, width): img_h, img_w = feature.shape[0], feature.shape[1] # 行代表高 , 列代表宽 w, h = width, height scale = max(w * 1.0 / img_w, h * 1.0 / img_h) print('x_h = {},x_w = {},to_w = {},to_h = {}, scale = {}'.format( img_h, img_w, w, h, scale)) new_w = int(img_w * scale) + 1 new_h = int(img_h * scale) + 1 resized_image = image.imresize(src=feature, w=new_w, h=new_h, interp=1) # 改变图像尺寸[fx,fy] resized_label = image.imresize(src=label, w=new_w, h=new_h, interp=1) # 改变图像尺寸[fx,fy] print('resized_image = {}'.format(resized_image.shape)) print('resized_label = {}'.format(resized_label.shape)) return self.__voc_rand_crop(resized_image, resized_label, height, width)
def resize(src, new_width, new_height, interp=2): """ Resizes image to new_width and new_height. Input image NDArray should has dim_order of 'HWC'. :param src: NDArray Source image in NDArray format :param new_width: int Width in pixel for resized image :param new_height: int Height in pixel for resized image :param interp: int interpolation method for all resizing operations Possible values: 0: Nearest Neighbors Interpolation. 1: Bilinear interpolation. 2: Area-based (resampling using pixel area relation). It may be a preferred method for image decimation, as it gives moire-free results. But when the image is zoomed, it is similar to the Nearest Neighbors method. (used by default). 3: Bicubic interpolation over 4x4 pixel neighborhood. 4: Lanczos interpolation over 8x8 pixel neighborhood. 9: Cubic for enlarge, area for shrink, bilinear for others 10: Random select from interpolation method metioned above. Note: When shrinking an image, it will generally look best with AREA-based interpolation, whereas, when enlarging an image, it will generally look best with Bicubic (slow) or Bilinear (faster but still looks OK). More details can be found in the documentation of OpenCV, please refer to http://docs.opencv.org/master/da/d54/group__imgproc__transform.html. :return: NDArray An `NDArray` containing the resized image. """ return img.imresize(src, new_width, new_height, interp)
def preprocess(img, image_shape): # img: str, required. # image_shape: required. img = image.imresize(img, *image_shape) img = (img.astype('float32') / 255 - rgb_mean) / rgb_std #norm return img.transpose((2, 0, 1)).expand_dims(axis=0)
def transform_mnist(data, label): if self.__resize: # data 默认 (28, 28, 1) # data imresize 后 (224, 224, 1) data = image.imresize(data, self.__resize, self.__resize) # change data from height x weight x channel to channel x height x weight return nd.transpose(data.astype("float32"), (2, 0, 1)) / 255, label.astype("float32")
def transform_mnist(data, label): if resize: # resize to resize x resize data = image.imresize(data, resize, resize) # change data from height x weight x channel to channel x height x weight return nd.transpose(data.astype('float32'), (2, 0, 1)) / 255, label.astype('float32')
def process_image(fname, data_shape): with open(fname, 'rb') as f: im = image.imdecode(f.read()) data = image.imresize(im, data_shape, data_shape) data = data.astype('float32') - readData.rgb_mean return data.transpose((2, 0, 1)).expand_dims(axis=0), im
def read_img(self, img_path): img_path = self.path % img_path img = image.imread(img_path) self.img_size = img.shape img = image.imresize(img, img_width, img_height) if self.transform is not None: img = self.transform(img) return img
def transform_test(data, label): im1 = image.imresize(data.astype('float32') / 255, 224, 224) im2 = image.imresize(data.astype('float32') / 255, 299, 299) auglist1 = image.CreateAugmenter(data_shape=(3, 224, 224), mean=np.array([0.485, 0.456, 0.406]), std=np.array([0.229, 0.224, 0.225])) auglist2 = image.CreateAugmenter(data_shape=(3, 299, 299), mean=np.array([0.485, 0.456, 0.406]), std=np.array([0.229, 0.224, 0.225])) for aug in auglist1: im1 = aug(im1) for aug in auglist2: im2 = aug(im2) # 将数据格式从"高*宽*通道"改为"通道*高*宽"。 im1 = nd.transpose(im1, (2,0,1)) im2 = nd.transpose(im2, (2,0,1)) return (im1,im2, nd.array([label]).asscalar().astype('float32'))
def resize(x, wi, hi): n, c, h, w = x.shape x = x.transpose((0, 2, 3, 1)) out = nd.zeros(shape=(n, hi, wi, c)) for i in range(n): out[i] = imresize(x[i], wi, hi, 0) return out.transpose((0, 3, 1, 2))
def process_image(fname): with open(fname, 'rb') as f: im = image.imdecode(f.read()) # resize to data_shape data = image.imresize(im, data_shape, data_shape) # minus rgb mean, divide std data = (data.astype('float32') - rgb_mean) / rgb_std return data.transpose((2,0,1)).expand_dims(axis=0), im
def load_and_crop_image(root, files, crop_size): imgs = [] for file in files: fp = os.path.join(root, file) img = image.imread(fp) h, w = img.shape[:2] ratio = h / w if crop_size[0] / crop_size[1] < ratio: w = crop_size[1] h = w * ratio else: h = crop_size[0] w = h / ratio image.imresize(img, int(w), int(h)) img, rect = image.random_crop(img, crop_size) imgs.append(img) return imgs
def cropImg(self, img, bbox, Det=False): bboxes = bbox.strip('\n') bboxes = bboxes.split(',') bboxes = [int(float(x)) for x in bboxes] coord = np.array(bboxes).reshape(-1, 2) xy_max = np.max(coord, axis=0) xy_min = np.min(coord, axis=0) w = xy_max[0] - xy_min[0] h = xy_max[1] - xy_min[1] p = ((w + h) / 2) if Det: A = int((np.sqrt((w + p) * (h + p)))) * 2 else: A = int((np.sqrt((w + p) * (h + p)))) center = xy_max / 2 + xy_min / 2 center = center.astype('int32') luy = np.clip((center[1] - (A // 2)), 0, img.shape[0]) rdy = np.clip((center[1] + (A // 2)), 0, img.shape[0]) lux = np.clip((center[0] - (A // 2)), 0, img.shape[1]) rdx = np.clip((center[0] + (A // 2)), 0, img.shape[1]) img = img[luy:rdy, lux:rdx, :] if Det: # print(img.shape) scale_w = 255 / img.shape[1] scale_h = 255 / img.shape[0] img = image.imresize(img, 255, 255) bboxInDet = mx.ndarray.array([ (((center[0]) - lux) * scale_w) / 255, (((center[1] - luy) * scale_h)) / 255, ((w * scale_w) / 255), ((h * scale_h) / 255) ]) #coord = mx.ndarray.array([(center[0] ) /255, center[1] /255, w/255, h/255]) return img, bboxInDet else: img = image.imresize(img, 127, 127) return img
def read_img(self, img_path): img_path = self.path + img_path img = image.imread(img_path) self.img_size = img.shape img = image.imresize(img, img_width, img_height) for trans in self.transform: img = trans(img) return img
def resize_img(data, resize=None): if resize: n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i].as_in_context(mx.cpu()), resize, resize) data = new_data return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255
def process_image(fname): with open(fname, 'rb') as f: im = image.imdecode(f.read()) # resize to data_shape data = image.imresize(im, data_shape, data_shape) # minus rgb mean data = data.astype('float32') - rgb_mean # convert to batch x channel x height xwidth return data.transpose((2, 0, 1)).expand_dims(axis=0), im
def transform(data, label): if resize: n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32')
def __getitem__(self, idx): img = image.imread(self.items[idx][0], self._flag) # resize成2次幂的整数倍方便处理 img = image.imresize(img, w=128, h=64) label = self.items[idx][1] label = str2vec(label) if self._transform is not None: return self._transform(img, label) return img, label
def process_image(fname): with open(fname, 'rb') as f: im = image.imdecode(f.read()) # resize to data_shape data = image.imresize(im, data_shape, data_shape) # minus rgb mean data = data.astype('float32') - rgb_mean # convert to batch x channel x height xwidth return data.transpose((2,0,1)).expand_dims(axis=0), im
def pre_processing(self, img): """ 对图片进行处理,先按照高度进行resize,resize之后如果宽度不足指定宽度,就补黑色像素,否则就强行缩放到指定宽度 :param img: 图片 :return: """ img_h = self.img_h img_w = self.img_w h, w = img.shape[:2] ratio_h = float(img_h) / h new_w = int(w * ratio_h) if new_w < img_w: img = image.imresize(img, w=new_w, h=img_h) step = nd.zeros((img_h, img_w - new_w, self.img_channel), dtype=img.dtype) img = nd.concat(img, step, dim=1) else: img = image.imresize(img, w=img_w, h=img_h) return img
def augument(data_path, label, image_name, save_path, size=224, training = True): image_path = os.path.join(data_path, image_name) (name, extension) = splitfilename(image_name) extension = extension.lower() if extension not in IMG_EXTS: print('filered image: %s' % image_name) return try: img = image.imdecode(open(image_path, 'rb').read()).astype('float32') except Exception as ex: print("error: ", ex) return if label is not None: label_path = os.path.join(save_path, label) else: label_path = save_path mkdir(label_path) if training: aug1 = image.HorizontalFlipAug(0.5) aug2 = image.HorizontalFlipAug(.5) img = image.resize_short(img, size=384, interp=2) center_crop, _ = image.center_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "0", extension) cv.imwrite(os.path.join(label_path, new_name), center_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "1", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "2", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "3", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) img_aug1 = aug1(random_crop).clip(0,255) new_name = "%s_%s%s" % (name, "4", extension) cv.imwrite(os.path.join(label_path, new_name), img_aug1.asnumpy()) img_aug2 = aug2(center_crop).clip(0, 255) new_name = "%s_%s%s" % (name, "5", extension) cv.imwrite(os.path.join(label_path, new_name), img_aug2.asnumpy()) img_resize = image.imresize(img, w=size, h=size, interp=2) new_name = "%s_%s%s" % (name, "6", extension) cv.imwrite(os.path.join(label_path, new_name), img_resize.asnumpy()) else: img = image.resize_short(img, size=size) img, _ = image.center_crop(img, size=(size, size)) new_name = "%s%s" % (name, extension) cv.imwrite(os.path.join(label_path, new_name), img.asnumpy())
def transform_mnist(data, label): # transform a batch of examples if resize: n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data # change data from batch x height x weight x channel to batch x channel x height x weight return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32')
def transform_mnist(data, label): # Transform a batch of examples. if resize: n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data # change data from batch x height x width x channel to batch x channel x height x width return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
def adjust_dpi(img, per): h, w, c = img.shape new_w, new_h = int(w * per), int(h * per) img = image.imresize(img, w=new_w, h=new_h, interp=1) print(img.shape) plt.ioff() plt.imshow(img.asnumpy()) plt.colorbar() plt.show() return img
def transform_mnist(data, label): # transform a batch of examples if resize:#改变形状 n = data.shape[0]#样本数量 n* 784 *1 ——————> n* 28 * 28 *1 new_data = nd.zeros((n, resize, resize, data.shape[3]))#data.shape[3]为通道数量 for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data # change data from batch x height x weight x channel to batch 0 x channel 3 x height 1 x weight 2 return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
def transform(data, label, resize=None): # transform a batch of examples if resize: n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data return data.astype('float32') / 255, label.astype('float32')
def transform_predict(im, size): im = im.astype('float32') / 255 #im = image.resize_short(im, size, interp=1) im = image.imresize(im, size, size, interp=1) # im = image.resize_short(im, 331) im = nd.transpose(im, (2,0,1)) im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # im = forty_crop(im, (352, 352)) im = two_crop(im) return (im)
def transform2D(data, label, resize=None): # transform a batch of examples if resize: n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data # change data from batch x height x weight x channel to batch x channel x height x weight return nd.transpose(data.astype('float32'), (2, 0, 1)) / 255, label.astype('float32')
def _transform_mnist(data, label): if resize: # resize to resize x resize n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data # change data from batch x height x weight x channel to batch x channel x height x weight return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32')
def forward(self, x): if isinstance(self._size, numeric_types): if not self._keep: wsize = self._size hsize = self._size else: h, w, _ = x.shape if h > w: wsize = self._size hsize = int(h * wsize / w) else: hsize = self._size wsize = int(w * hsize / h) else: wsize, hsize = self._size return image.imresize(x, wsize, hsize, self._interpolation)
def resize(src, new_width, new_height, interp=2): """Resizes image to new_width and new_height. Input image NDArray should has dim_order of 'HWC'. Parameters ---------- src : NDArray Source image in NDArray format new_width : int Width in pixel for resized image new_height : int Height in pixel for resized image interp : int interpolation method for all resizing operations Possible values: 0: Nearest Neighbors Interpolation. 1: Bilinear interpolation. 2: Area-based (resampling using pixel area relation). It may be a preferred method for image decimation, as it gives moire-free results. But when the image is zoomed, it is similar to the Nearest Neighbors method. (used by default). 3: Bicubic interpolation over 4x4 pixel neighborhood. 4: Lanczos interpolation over 8x8 pixel neighborhood. 9: Cubic for enlarge, area for shrink, bilinear for others 10: Random select from interpolation method metioned above. Note: When shrinking an image, it will generally look best with AREA-based interpolation, whereas, when enlarging an image, it will generally look best with Bicubic (slow) or Bilinear (faster but still looks OK). More details can be found in the documentation of OpenCV, please refer to http://docs.opencv.org/master/da/d54/group__imgproc__transform.html. Returns ------- NDArray An `NDArray` containing the resized image. """ return img.imresize(src, new_width, new_height, interp)
def preprocess(img, image_shape): img = image.imresize(img, *image_shape) img = (img.astype('float32')/255 - rgb_mean) / rgb_std return img.transpose((2,0,1)).expand_dims(axis=0)