コード例 #1
0
ファイル: train.py プロジェクト: wk738126046/SSD_Demo
def evaluate_acc(net, data_iter, ctx):
    data_iter.reset()
    box_metric = metric.MAE()
    outs, labels = None, None
    for i, batch in enumerate(data_iter):
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        # print('acc',label.shape)
        anchors, box_preds, cls_preds = net(data)
        #MultiBoxTraget 作用是将生成的anchors与哪些ground truth对应,提取出anchors的偏移和对应的类型
        #预测的误差是每次网络输出的预测框g与anchors的差分别/anchor[xywh],然后作为smoothL1(label-g)解算,g才是预测
        # 正负样本比例1:3
        box_offset, box_mask, cls_labels = MultiBoxTarget(
            anchors,
            label,
            cls_preds.transpose((0, 2, 1)),
            negative_mining_ratio=3.0)
        box_metric.update([box_offset], [box_preds * box_mask])
        cls_probs = nd.SoftmaxActivation(cls_preds.transpose((0, 2, 1)),
                                         mode='channel')
        #对输出的bbox通过NMS极大值抑制算法筛选检测框
        out = MultiBoxDetection(cls_probs,
                                box_preds,
                                anchors,
                                force_suppress=True,
                                clip=False,
                                nms_threshold=0.45)
        if outs is None:
            outs = out
            labels = label
        else:
            outs = nd.concat(outs, out, dim=0)
            labels = nd.concat(labels, label, dim=0)
    AP = evaluate_MAP(outs, labels)
    return AP, box_metric
コード例 #2
0
ファイル: demo.py プロジェクト: ljtnine/gluon_SSD-1
def detect_image(img_path):
    if not os.path.exists(img_path):
        print('can not find image: ', img_path)
    # img = Image.open(img_file)
    #print img_path
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (cfg.img_size, cfg.img_size))
    # img = ImageOps.fit(img, [data_shape, data_shape], Image.ANTIALIAS)
    origin_img = img.copy()
    img = (img / 255. - cfg.mean) / cfg.std
    img = np.transpose(img, (2, 0, 1))
    img = img[np.newaxis, :]
    img = F.array(img)

    print('input image shape: ', img.shape)

    ctx = mx.gpu(0)
    net = build_ssd("test", 300, ctx)
    net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    net.collect_params().reset_ctx(ctx)
    params = 'model/ssd.params'
    net.load_params(params, ctx=ctx)

    anchors, cls_preds, box_preds = net(img.as_in_context(ctx))
    print('anchors', anchors)
    print('class predictions', cls_preds)
    print('box delta predictions', box_preds)
    # convert predictions to probabilities using softmax
    cls_probs = F.SoftmaxActivation(F.transpose(cls_preds, (0, 2, 1)),
                                    mode='channel')

    # apply shifts to anchors boxes, non-maximum-suppression, etc...
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors],
                               force_suppress=True,
                               clip=True,
                               nms_threshold=0.01)
    output = output.asnumpy()

    pens = dict()

    plt.imshow(origin_img)

    thresh = 0.3
    for det in output[0]:
        cid = int(det[0])
        if cid < 0:
            continue
        score = det[1]
        if score < thresh:
            continue
        if cid not in pens:
            pens[cid] = (random.random(), random.random(), random.random())
        scales = [origin_img.shape[1], origin_img.shape[0]] * 2
        xmin, ymin, xmax, ymax = [
            int(p * s) for p, s in zip(det[2:6].tolist(), scales)
        ]
        rect = plt.Rectangle((xmin, ymin),
                             xmax - xmin,
                             ymax - ymin,
                             fill=False,
                             edgecolor=pens[cid],
                             linewidth=3)
        plt.gca().add_patch(rect)
        voc_class_name = [
            'person', 'bird', 'cat', 'cow', 'dog', 'horse', 'sheep',
            'aeroplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train',
            'bottle', 'chair', 'diningtable', 'pottedplant', 'sofa',
            'tvmonitor'
        ]
        text = voc_class_name[cid]
        plt.gca().text(xmin,
                       ymin - 2,
                       '{:s} {:.3f}'.format(text, score),
                       bbox=dict(facecolor=pens[cid], alpha=0.5),
                       fontsize=12,
                       color='white')
    plt.axis('off')
    # plt.savefig('result.png', dpi=100)
    plt.show()
コード例 #3
0
def predict(x):
    """  预测函数会输出所有边框,每个边框由[class_id, confidence, xmin, ymin, xmax, ymax]  """
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(cls_preds.transpose(0,2,1), mode='channel')
    return MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False)
コード例 #4
0
ファイル: train.py プロジェクト: wk738126046/SSD_Demo
def mytrain(net,
            train_data,
            valid_data,
            ctx,
            start_epoch,
            end_epoch,
            cls_loss,
            box_loss,
            trainer=None):
    if trainer is None:
        # trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01,'momentum':0.9, 'wd':5e-1})
        trainer = gluon.Trainer(net.collect_params(), 'sgd', {
            'learning_rate': 0.1,
            'wd': 1e-3
        })
    box_metric = metric.MAE()

    for e in range(start_epoch, end_epoch):
        # print(e)
        train_data.reset()
        valid_data.reset()
        box_metric.reset()
        tic = time.time()
        _loss = [0, 0]
        if e == 100 or e == 120 or e == 150 or e == 180 or e == 200:
            trainer.set_learning_rate(trainer.learning_rate * 0.2)

        outs, labels = None, None
        for i, batch in enumerate(train_data):
            data = batch.data[0].as_in_context(ctx)
            label = batch.label[0].as_in_context(ctx)
            # print(label.shape)
            with autograd.record():
                anchors, box_preds, cls_preds = net(data)
                # print(anchors.shape,box_preds.shape,cls_preds.shape)
                # negative_mining_ratio,在生成的mask中增加*3的反例参加loss的计算。
                box_offset, box_mask, cls_labels = MultiBoxTarget(
                    anchors,
                    label,
                    cls_preds.transpose(axes=(0, 2, 1)),
                    negative_mining_ratio=3.0)  # , overlap_threshold=0.75)

                loss1 = cls_loss(cls_preds, cls_labels)
                loss2 = box_loss(box_preds, box_offset, box_mask)
                loss = loss1 + loss2
                # print(loss1.shape,loss2.shape)
            loss.backward()
            trainer.step(data.shape[0])
            _loss[0] += nd.mean(loss1).asscalar()
            _loss[1] += nd.mean(loss2).asscalar()

            cls_probs = nd.SoftmaxActivation(cls_preds.transpose((0, 2, 1)),
                                             mode='channel')
            out = MultiBoxDetection(cls_probs,
                                    box_preds,
                                    anchors,
                                    force_suppress=True,
                                    clip=False,
                                    nms_threshold=0.45)
            if outs is None:
                outs = out
                labels = label
            else:
                outs = nd.concat(outs, out, dim=0)
                labels = nd.concat(labels, label, dim=0)

            box_metric.update([box_offset], [box_preds * box_mask])

        train_AP = evaluate_MAP(outs, labels)
        valid_AP, val_box_metric = evaluate_acc(net, valid_data, ctx)
        info["train_ap"].append(train_AP)
        info["valid_ap"].append(valid_AP)
        info["loss"].append(_loss)

        if (e + 1) % 10 == 0:
            print("epoch: %d time: %.2f loss: %.4f, %.4f lr: %.5f" %
                  (e, time.time() - tic, _loss[0], _loss[1],
                   trainer.learning_rate))
            print("train mae: %.4f AP: %.4f" % (box_metric.get()[1], train_AP))
            print("valid mae: %.4f AP: %.4f" %
                  (val_box_metric.get()[1], valid_AP))

    if True:
        info["loss"] = np.array(info["loss"])
        info["cls_loss"] = info["loss"][:, 0]
        info["box_loss"] = info["loss"][:, 1]

        plt.figure(figsize=(12, 4))
        plt.subplot(121)
        plot("train_ap")
        plot("valid_ap")
        plt.legend(loc="upper right")
        plt.subplot(122)
        plot("cls_loss")
        plot("box_loss")
        plt.legend(loc="upper right")
        plt.savefig('loss_curve.png')