def evaluate_acc(net, data_iter, ctx): data_iter.reset() box_metric = metric.MAE() outs, labels = None, None for i, batch in enumerate(data_iter): data = batch.data[0].as_in_context(ctx) label = batch.label[0].as_in_context(ctx) # print('acc',label.shape) anchors, box_preds, cls_preds = net(data) #MultiBoxTraget 作用是将生成的anchors与哪些ground truth对应,提取出anchors的偏移和对应的类型 #预测的误差是每次网络输出的预测框g与anchors的差分别/anchor[xywh],然后作为smoothL1(label-g)解算,g才是预测 # 正负样本比例1:3 box_offset, box_mask, cls_labels = MultiBoxTarget( anchors, label, cls_preds.transpose((0, 2, 1)), negative_mining_ratio=3.0) box_metric.update([box_offset], [box_preds * box_mask]) cls_probs = nd.SoftmaxActivation(cls_preds.transpose((0, 2, 1)), mode='channel') #对输出的bbox通过NMS极大值抑制算法筛选检测框 out = MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False, nms_threshold=0.45) if outs is None: outs = out labels = label else: outs = nd.concat(outs, out, dim=0) labels = nd.concat(labels, label, dim=0) AP = evaluate_MAP(outs, labels) return AP, box_metric
def detect_image(img_path): if not os.path.exists(img_path): print('can not find image: ', img_path) # img = Image.open(img_file) #print img_path img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (cfg.img_size, cfg.img_size)) # img = ImageOps.fit(img, [data_shape, data_shape], Image.ANTIALIAS) origin_img = img.copy() img = (img / 255. - cfg.mean) / cfg.std img = np.transpose(img, (2, 0, 1)) img = img[np.newaxis, :] img = F.array(img) print('input image shape: ', img.shape) ctx = mx.gpu(0) net = build_ssd("test", 300, ctx) net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx) net.collect_params().reset_ctx(ctx) params = 'model/ssd.params' net.load_params(params, ctx=ctx) anchors, cls_preds, box_preds = net(img.as_in_context(ctx)) print('anchors', anchors) print('class predictions', cls_preds) print('box delta predictions', box_preds) # convert predictions to probabilities using softmax cls_probs = F.SoftmaxActivation(F.transpose(cls_preds, (0, 2, 1)), mode='channel') # apply shifts to anchors boxes, non-maximum-suppression, etc... output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=True, nms_threshold=0.01) output = output.asnumpy() pens = dict() plt.imshow(origin_img) thresh = 0.3 for det in output[0]: cid = int(det[0]) if cid < 0: continue score = det[1] if score < thresh: continue if cid not in pens: pens[cid] = (random.random(), random.random(), random.random()) scales = [origin_img.shape[1], origin_img.shape[0]] * 2 xmin, ymin, xmax, ymax = [ int(p * s) for p, s in zip(det[2:6].tolist(), scales) ] rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=pens[cid], linewidth=3) plt.gca().add_patch(rect) voc_class_name = [ 'person', 'bird', 'cat', 'cow', 'dog', 'horse', 'sheep', 'aeroplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train', 'bottle', 'chair', 'diningtable', 'pottedplant', 'sofa', 'tvmonitor' ] text = voc_class_name[cid] plt.gca().text(xmin, ymin - 2, '{:s} {:.3f}'.format(text, score), bbox=dict(facecolor=pens[cid], alpha=0.5), fontsize=12, color='white') plt.axis('off') # plt.savefig('result.png', dpi=100) plt.show()
def predict(x): """ 预测函数会输出所有边框,每个边框由[class_id, confidence, xmin, ymin, xmax, ymax] """ anchors, cls_preds, box_preds = net(x.as_in_context(ctx)) cls_probs = nd.SoftmaxActivation(cls_preds.transpose(0,2,1), mode='channel') return MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False)
def mytrain(net, train_data, valid_data, ctx, start_epoch, end_epoch, cls_loss, box_loss, trainer=None): if trainer is None: # trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01,'momentum':0.9, 'wd':5e-1}) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.1, 'wd': 1e-3 }) box_metric = metric.MAE() for e in range(start_epoch, end_epoch): # print(e) train_data.reset() valid_data.reset() box_metric.reset() tic = time.time() _loss = [0, 0] if e == 100 or e == 120 or e == 150 or e == 180 or e == 200: trainer.set_learning_rate(trainer.learning_rate * 0.2) outs, labels = None, None for i, batch in enumerate(train_data): data = batch.data[0].as_in_context(ctx) label = batch.label[0].as_in_context(ctx) # print(label.shape) with autograd.record(): anchors, box_preds, cls_preds = net(data) # print(anchors.shape,box_preds.shape,cls_preds.shape) # negative_mining_ratio,在生成的mask中增加*3的反例参加loss的计算。 box_offset, box_mask, cls_labels = MultiBoxTarget( anchors, label, cls_preds.transpose(axes=(0, 2, 1)), negative_mining_ratio=3.0) # , overlap_threshold=0.75) loss1 = cls_loss(cls_preds, cls_labels) loss2 = box_loss(box_preds, box_offset, box_mask) loss = loss1 + loss2 # print(loss1.shape,loss2.shape) loss.backward() trainer.step(data.shape[0]) _loss[0] += nd.mean(loss1).asscalar() _loss[1] += nd.mean(loss2).asscalar() cls_probs = nd.SoftmaxActivation(cls_preds.transpose((0, 2, 1)), mode='channel') out = MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False, nms_threshold=0.45) if outs is None: outs = out labels = label else: outs = nd.concat(outs, out, dim=0) labels = nd.concat(labels, label, dim=0) box_metric.update([box_offset], [box_preds * box_mask]) train_AP = evaluate_MAP(outs, labels) valid_AP, val_box_metric = evaluate_acc(net, valid_data, ctx) info["train_ap"].append(train_AP) info["valid_ap"].append(valid_AP) info["loss"].append(_loss) if (e + 1) % 10 == 0: print("epoch: %d time: %.2f loss: %.4f, %.4f lr: %.5f" % (e, time.time() - tic, _loss[0], _loss[1], trainer.learning_rate)) print("train mae: %.4f AP: %.4f" % (box_metric.get()[1], train_AP)) print("valid mae: %.4f AP: %.4f" % (val_box_metric.get()[1], valid_AP)) if True: info["loss"] = np.array(info["loss"]) info["cls_loss"] = info["loss"][:, 0] info["box_loss"] = info["loss"][:, 1] plt.figure(figsize=(12, 4)) plt.subplot(121) plot("train_ap") plot("valid_ap") plt.legend(loc="upper right") plt.subplot(122) plot("cls_loss") plot("box_loss") plt.legend(loc="upper right") plt.savefig('loss_curve.png')