def display_anchors(fmap_w, fmap_h, s): fmap = nd.zeros((1, 10, fmap_h, fmap_w)) # 前两维的取值不影响输出结果 anchors = contrib.nd.MultiBoxPrior(fmap, sizes=s, ratios=[1, 2, 0.5]) print('anchor',anchors) bbox_scale = nd.array((w, h, w, h)) d2l.show_bboxes(d2l.plt.imshow(img.asnumpy()).axes, anchors[0] * bbox_scale)
def display(img, output, threshold): fig = d2l.plt.imshow(img.asnumpy()) for row in output: score = row[1].asscalar() if score < threshold: continue h, w = img.shape[0:2] bbox = [row[2:6] * nd.array((w, h, w, h), ctx=row.context)] d2l.show_bboxes(fig.axes, bbox, '%.2f' % score, 'w')
def pikaqiu(): batch_size, edge_size = 32, 256 train_iter, _ = load_data_pikachu(batch_size, edge_size) batch = train_iter.next() print(batch.data[0].shape, batch.label[0].shape) imgs = (batch.data[0][0:10].transpose((0, 2, 3, 1))) / 255 axes = d2l.show_images(imgs, 2, 5).flatten() for ax, label in zip(axes, batch.label[0][0:10]): d2l.show_bboxes(ax, [label[0][1:5] * edge_size], colors=['w'])
train_iter = image.ImageDeIter( path_imgrec=os.path.join(data_dir, "train.rec"), path_imgidx=os.path.join(data_dir, 'train.idx'), batch_size=batch_size, data_shape=(3, edge_size, edge_size), shuffle=True, rand_crop=1, min_object_covered=0.95, max_attempts=200) val_iter = image.ImageDetIter(path_imgrec=os.path.join(data_dir, 'val.rec'), batch_size=batch_size, data_shape=(3, edge_size, edge_size), suhffle=False) return train_iter, val_iter # In[ ]: batch_size, edge_size = 32, 256 train_iter, _ = load_data_pikachu(batch_size, edge_size) batch = train_iter.next() batch.data[0].shape, batch.label[0].shape # In[ ]: #画出10张图片和pikachu的bounding_box imgs = (batch.data[0][0:10].transpose((0, 2, 3, 1))) / 255 axes = d2l.show_images(imgs, 2, 5).flatten() for ax, label in zip (axes, batch.label[0][0:10]): d2l.show_bboxes(ax, [label[0][1:5] * edge_size], colors=['w'])
l = ssdLoss(class_preds, bbox_preds, class_labels, bbox_labels, bbox_mask) l.backward() trainer.step(batch_size) class_acc += (class_preds.argmax( axis=-1) == class_labels).sum().asscalar() n += class_labels.size bbox_acc += ((bbox_preds - bbox_labels) * bbox_mask).abs().sum().asscalar() m += bbox_labels.size print('epoch %d,class acc is %.2e,bbox err is %.2e,time is %.2f' % (epoch + 1, class_acc / n, bbox_acc / m, time.time() - start)) img = image.imread('D:\d2l-zh20200904\img\pikachu.jpg') feature = image.imresize(img, 256, 256).astype('float32').transpose( (2, 0, 1)).expand_dims(axis=0) anchors, class_preds, bbox_preds = net(feature) class_probs = class_preds.softmax(axis=-1).transpose((0, 2, 1)) output = contrib.nd.MultiBoxDetection(class_probs, bbox_preds, anchors) idx = [i for i, row in enumerate(output[0]) if row[0].asscalar() != -1] fig = plot.imshow(img.asnumpy()) nms_outputs = output[0, idx] h, w = img.shape[0:2] for nms_output in nms_outputs: if nms_output[1] < 0.3: continue d2l.show_bboxes(fig.axes, [nms_output[2:] * nd.array((w, h, w, h))], '%.2f' % nms_output[1].asscalar()) plot.show() exit()
from mxnet import image,gluon,nd,autograd,contrib import d2lzh as d2l img=image.imread('D:\d2l-zh-0925\img\catdog.jpg').asnumpy() h,w=img.shape[0:2] img_random = nd.random.uniform(shape=(1,3,h,w)) anchors=contrib.ndarray.MultiBoxPrior(img_random,sizes=[0.75,0.5,0.25],ratios=[1,2,0.5]) boxes=anchors.reshape(shape=(h,w,5,4)) box_scale=nd.array((w,h,w,h)) fig=d2l.plt.imshow(img) d2l.show_bboxes(fig.axes,boxes[123,123,:,:]*box_scale) ground_truth = nd.array([[0, 0.1, 0.08, 0.52, 0.92], [1, 0.55, 0.2, 0.9, 0.88]]) labels=contrib.nd.MultiBoxTarget(anchors,ground_truth.expand_dims(axis=0),nd.zeros((1,3,2042040))) anchors = nd.array([[0.1, 0.08, 0.52, 0.92], [0.08, 0.2, 0.56, 0.95], [0.15, 0.3, 0.62, 0.91], [0.55, 0.2, 0.9, 0.88]]) offset_preds = nd.array([0] * anchors.size) cls_probs = nd.array([[0] * 4, # 背景的预测概率 [0.9, 0.8, 0.7, 0.1], # 狗的预测概率 [0.1, 0.2, 0.3, 0.9]]) # 猫的预测概率 output = contrib.ndarray.MultiBoxDetection( cls_probs.expand_dims(axis=0), offset_preds.expand_dims(axis=0), anchors.expand_dims(axis=0), nms_threshold=0.5) output
def display_anchors(fmap_w, fmap_h, s): fmap = nd.zeros((1, 10, fmap_w, fmap_h)) # 1 sample, 10 channels anchors = contrib.nd.MultiBoxPrior(fmap, sizes=s, ratios=[1, 2, 0.5]) bbox_scale = nd.array((w, h, w, h)) d2l.show_bboxes( d2l.plt.imshow(img.asnumpy()).axes, anchors[0] * bbox_scale)
import d2lzh as d2l from mxnet import image from matplotlib import pyplot as plt batch_size=32 img_size=512 train_iter=image.ImageDetIter(path_imgrec='D:\d2l-zh20200904\data\pikachu\\train.rec', path_imgidx='D:\d2l-zh20200904\data\pikachu\\train.idx', batch_size=batch_size,data_shape=(3,img_size,img_size),shuffle=True,rand_crop=1, min_object_covered=0.95,max_attempts=200) test_iter=image.ImageDetIter(path_imgrec='D:\d2l-zh20200904\data\pikachu\\val.rec',batch_size=batch_size,data_shape=(3,img_size,img_size),shuffle=False) batch=train_iter.next() imgs=batch.data[0].transpose((0,2,3,1))/255 axes=d2l.show_images(imgs,4,8).flatten() # imgs = (batch.data[0][0:10].transpose((0, 2, 3, 1))) / 255 # axes = d2l.show_images(imgs, 2, 5).flatten() # for ax, label in zip(axes, batch.label[0][0:10]): # d2l.show_bboxes(ax, [label[0][1:5] * 256], colors=['w']) labels=batch.label[0] for ax,label in zip(axes,labels): d2l.show_bboxes(ax,[label[0][1:5]*img_size]) plt.show()
import d2lzh as d2l from mxnet import contrib, gluon, image, nd import numpy as np np.set_printoptions(2) img = image.imread('../img/catdog.jpg').asnumpy() h, w = img.shape[0:2] print(h, w) X = nd.random.uniform(shape=(1, 3, h, w)) Y = contrib.nd.MultiBoxPrior(X, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5]) boxes = Y.reshape((h, w, 5, 4)) d2l.set_figsize() bbox_scale = nd.array((w, h, w, h)) fig = d2l.plt.imshow(img) d2l.show_bboxes(fig.axes, boxes[250, 250, :, :] * bbox_scale, [ 's=0.75, r=1', 's=0.5, r=1', 's=0.25, r=1', 's=0.75, r=2', 's=0.75, r=0.5' ]) d2l.plt.show()