def augument(data_path, label, image_name, save_path, size=224, training = True): image_path = os.path.join(data_path, image_name) (name, extension) = splitfilename(image_name) extension = extension.lower() if extension not in IMG_EXTS: print('filered image: %s' % image_name) return try: img = image.imdecode(open(image_path, 'rb').read()).astype('float32') except Exception as ex: print("error: ", ex) return if label is not None: label_path = os.path.join(save_path, label) else: label_path = save_path mkdir(label_path) if training: aug1 = image.HorizontalFlipAug(0.5) aug2 = image.HorizontalFlipAug(.5) img = image.resize_short(img, size=384, interp=2) center_crop, _ = image.center_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "0", extension) cv.imwrite(os.path.join(label_path, new_name), center_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "1", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "2", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "3", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) img_aug1 = aug1(random_crop).clip(0,255) new_name = "%s_%s%s" % (name, "4", extension) cv.imwrite(os.path.join(label_path, new_name), img_aug1.asnumpy()) img_aug2 = aug2(center_crop).clip(0, 255) new_name = "%s_%s%s" % (name, "5", extension) cv.imwrite(os.path.join(label_path, new_name), img_aug2.asnumpy()) img_resize = image.imresize(img, w=size, h=size, interp=2) new_name = "%s_%s%s" % (name, "6", extension) cv.imwrite(os.path.join(label_path, new_name), img_resize.asnumpy()) else: img = image.resize_short(img, size=size) img, _ = image.center_crop(img, size=(size, size)) new_name = "%s%s" % (name, extension) cv.imwrite(os.path.join(label_path, new_name), img.asnumpy())
def main(): net = models.resnet50_v2(pretrained=True) url = 'http://data.mxnet.io/models/imagenet/synset.txt' fname = download(url) with open(fname, 'r') as f: text_labels = [' '.join(l.split()[1:]) for l in f] url2 = 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b5/\ Golden_Retriever_medium-to-light-coat.jpg/\ 365px-Golden_Retriever_medium-to-light-coat.jpg' fname2 = download(url2) x = image.imread(fname2) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] for i in idx: i = int(i.asscalar()) print('With prob = %.5f, it contains %s' % (prob[0, i].asscalar(), text_labels[i]))
def predict_with_models_from_gluon_model_zoo_example(): # Gluon model zoo provides multiple pre-trained powerful models. # We can download and load a pre-trained ResNet-50 V2 model that was trained on the ImageNet dataset. net = models.resnet50_v2(pretrained=True) # Download and load the text labels for each class. url = 'http://data.mxnet.io/models/imagenet/synset.txt' fname = download(url) with open(fname, 'r') as f: text_labels = [' '.join(l.split()[1:]) for l in f] # Randomly pick a dog image from Wikipedia as a test image, download and read it. url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Golden_Retriever_medium-to-light-coat.jpg/365px-Golden_Retriever_medium-to-light-coat.jpg' fname = download(url) x = image.imread(fname) # Use the image processing functions provided in the MXNet image module. x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() def transform(data): data = data.transpose((2, 0, 1)).expand_dims(axis=0) rgb_mean = nd.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) rgb_std = nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) return (data.astype('float32') / 255 - rgb_mean) / rgb_std prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] for i in idx: i = int(i.asscalar()) print('With prob = %.5f, it contains %s' % (prob[0, i].asscalar(), text_labels[i]))
def transform_val(data, label): im = data.astype('float32') / 255 im = image.resize_short(im, 256) im, _ = image.center_crop(im, (224, 224)) im = nd.transpose(im, (2,0,1)) im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) return (im, nd.array([label]).asscalar())
def transform_val(data, label): im = data.astype('float32') / 255 im = image.resize_short(im, 256) #对数据按照短边进行crop为256*256 im, _ = image.center_crop(im, (224, 224)) #对数据进行中心裁剪为224*224 im = nd.transpose(im, (2,0,1)) # im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))#归一化操作 return (im, nd.array([label]).asscalar()) #返回图像和标签
def __getitem__(self, idx): data, label = self.paths[idx] img_d = self._imread(data) if self._annot: img_an = self._imread(label, flag=0) img_an = img_an.astype('float32') img_an = img_an.transpose(axes=(2, 0, 1)) img_an = img_an.reshape(1, 1, 360, 480) img_an = img_an.pad(mode="constant", constant_value=0.0, pad_width=(0, 0, 0, 0, 12, 12, 0, 0)) img_an = img_an.reshape(1, 384, 480) img_an = img_an.transpose(axes=(1, 2, 0)) img_an, _ = image.center_crop(img_an, (472, 376)) img_an = img_an.transpose(axes=(2, 0, 1)) imag_d = self._transform(img_d) imag_d = imag_d.astype('float32') imag_d = imag_d.reshape(1, 3, 360, 480) imag_d = imag_d.pad(mode="constant", constant_value=0.0, pad_width=(0, 0, 0, 0, 12, 12, 0, 0)) imag_d = imag_d.reshape(3, 384, 480) if self._annot: return imag_d, img_an return imag_d
def self_designed_transform(x, args): if args.CenterCrop is True: x, _ = center_crop(x, args.CenterCropSize) x = two_type_factory[args.sumbol_or_ndarray].image.to_tensor(x) if args.Normalize is True: x = two_type_factory[args.sumbol_or_ndarray].image.normalize( x, mean=args.mean, std=args.std) return x
def transform_image(img_path): img = image.imread(img_path) data = image.resize_short(img, 256) data, _ = image.center_crop(data, (224, 224)) data = data.transpose((2, 0, 1)).expand_dims(axis=0) rgb_mean = nd.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) rgb_std = nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) data = (data.astype("float32") / 255 - rgb_mean) / rgb_std return data
def load_image(img_path, long_side_length): x = image.imread(img_path) x = image.resize_short(x, long_side_length) x, _ = image.center_crop(x, (448, 448)) x = x.astype('float32') x = x / 255 x = image.color_normalize(x, mean=nd.array([0.485, 0.456, 0.406]), std=nd.array([0.229, 0.224, 0.225])) x = x.reshape((1, 3, 448, 448)) return x
def classify(image_file_path): x = image.imread(image_file_path) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] print(' prob | name') print(' ------------------') for i in idx: i = int(i.asscalar()) print(' %.3f | %s' % (prob[0, i].asscalar(), text_labels[i]))
def transform_filter(data, label): """ 用于图片/标签前处理的函数。 Transform function for image/label pre-processing. :param data: image data, mx.ndarray :param label: image label, mx.ndarray """ im = filter_image(data, filter_level) # compress image to certain bit im = im.astype("float32") / 255 im = image.resize_short(im, RESIZE_SIZE) im, _ = image.center_crop(im, (INPUT_SIZE, INPUT_SIZE)) im = nd.transpose(im, (2, 0, 1)) im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) return (im, nd.array([label]).asscalar())
from mxnet.gluon.utils import download from mxnet import image net = models.resnet50_v2(pretrained=True) url = 'http://data.mxnet.io/models/imagenet/synset.txt' fname = download(url) with open(fname, 'r') as f: text_labels = [' '.join(l.split()[1]) for l in f] url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Golden_Retriever_medium-to-light-coat.jpg/365px-Golden_Retriever_medium-to-light-coat.jpg' fname = download(url) x = image.imread(fname) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() def transform(data): data = data.transpose((2, 0, 1)).expand_dims(axis=0) rgb_mean = nd.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) rgb_std = nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) return (data.astype('float32') / 255 - rgb_mean) / rgb_std prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] for i in idx: i = int(i.asscalar())
def load_vgg16_image(img_path): x = image.imread(img_path) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) return x
def load_vgg16_image(img_path, image_width=224, image_height=224): x = image.imread(img_path) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (image_width, image_height)) return x