예제 #1
0
    def test_train(self):
        num_classses = 2
        net = facebox.FaceBox(num_classes=num_classses)
        facebox_box_coder = facebox.FaceBoxCoder(net)

        C, H, W = (3, 1024, 1024)
        x = Variable(torch.randn(1, C, H, W))
        boxes = torch.from_numpy(
            np.array([(0, 0, 100, 100), (25, 25, 125, 125),
                      (200, 200, 250, 250), (0, 0, 300, 300)],
                     dtype=np.float32))
        boxes /= torch.Tensor([W, H, W, H]).expand_as(boxes)  # norm to [0-1]
        labels = torch.from_numpy(np.array([1, 1, 1, 1], dtype=np.long))
        loc_targets, cls_targets = facebox_box_coder.encode(boxes, labels)
        loc_targets = loc_targets[None, :]
        cls_targets = cls_targets[None, :]
        # print('loc_targets.size():{}'.format(loc_targets.size()))
        # print('cls_targets.size():{}'.format(cls_targets.size()))

        # optimizer = optim.SGD(net.parameters(), lr=1e-5, momentum=0.9, weight_decay=5e-4)
        optimizer = optim.Adam(net.parameters(), lr=1e-5, weight_decay=5e-4)
        criterion = facebox.FaceBoxLoss(num_classes=num_classses)

        for epoch in range(1):
            loc_preds, cls_preds = net(x)
            # print('loc_preds.size():{}'.format(loc_preds.size()))
            # print('cls_preds.size():{}'.format(cls_preds.size()))
            optimizer.zero_grad()

            loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)
            loss.backward()
            optimizer.step()
예제 #2
0
    def test_encode(self):
        pass
        C, H, W = (3, 300, 300)
        net = facebox.FaceBox(num_classes=2)
        facebox_data_coder = facebox.FaceBoxCoder(net)

        boxes = torch.from_numpy(
            np.array([(0.4531, 0.1200, 0.6465, 0.4567)], dtype=np.float32))
        labels = torch.from_numpy(np.array([1], dtype=np.int32))
        loc_targets, conf_targets = facebox_data_coder.encode(boxes, labels)
예제 #3
0
파일: demo.py 프로젝트: zymITsky/facedet
def demo():
    num_classses = 2
    net = facebox.FaceBox(num_classes=num_classses)
    facebox_box_coder = facebox.FaceBoxCoder(net)

    net.load_state_dict(
        torch.load('weight/facebox.pt',
                   map_location=lambda storage, loc: storage))
    net.eval()

    cap = cv2.VideoCapture(0)

    while True:
        # images_np = cv2.imread('13_Interview_Interview_2_People_Visible_13_52.jpg')
        retval, images_np = cap.read()
        images = cv2.resize(images_np, (1024, 1024))
        images = torch.from_numpy(images.transpose((2, 0, 1)))
        images = images.float().div(255)
        images = Variable(torch.unsqueeze(images, 0), volatile=True)

        loc_preds, conf_preds = net(images)

        loc = loc_preds[0, :, :]
        conf = conf_preds[0, :, :]

        boxes, labels, probs = facebox_box_coder.decode(
            loc,
            F.softmax(conf).data)
        # print('boxes:{}'.format(boxes))
        # print('labels:{}'.format(labels))
        print('probs:{}'.format(probs))

        img_h, img_w, img_c = images_np.shape
        print('images_np.shape:{}'.format(images_np.shape))
        for box_id, box in enumerate(boxes):
            prob = probs[box_id]
            box_x1 = box[0] * img_w
            box_y1 = box[1] * img_h
            box_x2 = box[2] * img_w
            box_y2 = box[3] * img_h
            print('({},{})->({},{})'.format(box_x1, box_y1, box_x2, box_y2))
            cv2.rectangle(images_np, (box_x1, box_y1), (box_x2, box_y2),
                          (255, 0, 0))
            cv2.putText(images_np, str(prob), (box_x1, box_y1),
                        cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 0.4, (0, 255, 0))

        cv2.imshow('images_np', images_np)
        key = cv2.waitKey(1)
        if key == 27:
            break

    cap.release()
예제 #4
0
    def test_decode(self):

        C, H, W = (3, 1024, 1024)
        net = facebox.FaceBox(num_classes=2)
        facebox_data_coder = facebox.FaceBoxCoder(net)
        facebox_default_boxes = facebox_data_coder.default_boxes
        print('facebox_default_boxes.size():{}'.format(
            facebox_default_boxes.size()))
        # print('facebox_default_boxes:{}'.format(facebox_default_boxes))
        # locs和confs刚好只有一个batch
        locs = torch.load('../data/loc.pt')
        confs = torch.load('../data/conf.pt')
        # print('locs:', locs)
        # print('confs:', confs)

        loc = locs[0, :, :]
        conf = confs[0, :, :]

        loc_np = loc.data.numpy()
        conf_np = conf.data.numpy()

        print('loc.size():{}'.format(loc.size()))
        print('conf.size():{}'.format(conf.size()))

        boxes, labels, probs = facebox_data_coder.decode(
            loc,
            F.softmax(conf).data)
        print('boxes:{}'.format(boxes))
        print('labels:{}'.format(labels))
        print('probs:{}'.format(probs))

        import cv2
        img = cv2.imread('../obama.jpg')
        img_h, img_w, img_c = img.shape

        for box in boxes:
            box_x1 = box[0] * img_w
            box_y1 = box[1] * img_h
            box_x2 = box[2] * img_w
            box_y2 = box[3] * img_h
            cv2.rectangle(img, (box_x1, box_y1), (box_x2, box_y2), (255, 0, 0))

        cv2.imshow('img', img)
        cv2.waitKey()
예제 #5
0
def test():
    num_classses = 2
    net = facebox.FaceBox(num_classes=num_classses)
    facebox_box_coder = facebox.FaceBoxCoder(net)

    root = os.path.expanduser('~/Data/WIDER')
    train_dataset = wider_face_loader.WiderFaceLoader(
        root=root, split='train', boxcoder=facebox_box_coder)
    train_dataloader = data.DataLoader(train_dataset,
                                       batch_size=1,
                                       shuffle=True)
    net.load_state_dict(
        torch.load('weight/facebox.pt',
                   map_location=lambda storage, loc: storage))
    net.eval()

    for epoch in range(1):

        for train_id, (images, loc_targets,
                       conf_targets) in enumerate(train_dataloader):
            # images = Variable(images)
            images_np = cv2.imread('obama.jpg')
            images = cv2.resize(images_np, (1024, 1024))
            images = torch.from_numpy(images.transpose((2, 0, 1)))
            images = images.float().div(255)
            images = Variable(torch.unsqueeze(images, 0), volatile=True)

            loc_preds, conf_preds = net(images)

            loc = loc_preds[0, :, :]
            conf = conf_preds[0, :, :]

            loc_np = loc.data.numpy()
            conf_np = conf.data.numpy()

            # image_np = images[0, :, :, :].data.numpy()
            # image_np = image_np.transpose((1, 2, 0))
            # print(image_np.dtype)
            print(images_np.shape)
            # cv2.imshow('img', image_np)
            # cv2.waitKey()

            boxes, labels, probs = facebox_box_coder.decode(
                loc,
                F.softmax(conf).data)
            print('boxes:{}'.format(boxes))
            print('labels:{}'.format(labels))
            print('probs:{}'.format(probs))

            img_h, img_w, img_c = images_np.shape
            for box in boxes:
                box_x1 = box[0] * img_w
                box_y1 = box[1] * img_h
                box_x2 = box[2] * img_w
                box_y2 = box[3] * img_h
                print('({},{})->({},{})'.format(box_x1, box_y1, box_x2,
                                                box_y2))
                cv2.rectangle(images_np, (box_x1, box_y1), (box_x2, box_y2),
                              (255, 0, 0))

            cv2.imshow('images_np', images_np)
            cv2.waitKey()

            print('loc_preds.size():{}'.format(loc_preds.size()))
            print('conf_preds.size():{}'.format(conf_preds.size()))
            # print('loc_targets.size():{}'.format(loc_targets.size()))
            # print('conf_targets.size():{}'.format(conf_targets.size()))
            break
예제 #6
0
def train():
    vis = visdom.Visdom()

    num_classses = 2
    net = facebox.FaceBox(num_classes=num_classses)
    if os.path.exists('weight/facebox.pt'):
        net.load_state_dict(
            torch.load('weight/facebox.pt',
                       map_location=lambda storage, loc: storage))
    facebox_box_coder = facebox.FaceBoxCoder(net)

    root = os.path.expanduser('~/Data/WIDER')
    train_dataset = wider_face_loader.WiderFaceLoader(
        root=root, boxcoder=facebox_box_coder)
    train_dataloader = data.DataLoader(train_dataset,
                                       batch_size=1,
                                       shuffle=True)

    # optimizer = optim.SGD(net.parameters(), lr=1e-5, momentum=0.9, weight_decay=5e-4)
    optimizer = optim.Adam(net.parameters(), lr=1e-5, weight_decay=1e-4)
    criterion = facebox.FaceBoxLoss(num_classes=num_classses)

    for epoch in range(100):

        loss_epoch = 0
        loss_avg_epoch = 0
        data_count = 0

        for train_id, (images, loc_targets,
                       conf_targets) in enumerate(train_dataloader):
            # data_count = train_id+1
            images = Variable(images)
            loc_preds, conf_preds = net(images)
            # print('loc_preds.size():{}'.format(loc_preds.size()))
            # print('conf_preds.size():{}'.format(conf_preds.size()))
            # print('loc_targets.size():{}'.format(loc_targets.size()))
            # print('conf_targets.size():{}'.format(conf_targets.size()))
            optimizer.zero_grad()
            loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets)

            loss_numpy = loss.data.numpy()
            loss_numpy = np.expand_dims(loss_numpy, axis=0)

            if not np.isinf(loss_numpy.sum()):
                loss_epoch += loss_numpy
                data_count += 1
            else:
                data_count = 0
                loss_epoch = 0

            loss.backward()
            optimizer.step()

            # print('loss_numpy:', loss_numpy)
            # print('loss_epoch:', loss_epoch)
            # print('loss_numpy:{},loss_epoch:{}'.format(loss_numpy, loss_epoch))

            if not np.isinf(loss_numpy.sum()):
                win = 'loss'
                win_res = vis.line(X=np.ones(1) * train_id,
                                   Y=loss_numpy,
                                   win=win,
                                   update='append')
                if win_res != win:
                    vis.line(X=np.ones(1) * train_id, Y=loss_numpy, win=win)

            # 50个batch显示一次作为平均值
            if data_count == 30:
                loss_avg_epoch = loss_epoch / (30 * 1.0)
                loss_avg_epoch = np.expand_dims(loss_avg_epoch, axis=0)
                print('loss_avg_epoch:', loss_avg_epoch)

                win = 'loss_epoch'
                win_res = vis.line(X=np.ones(1) * (epoch * 30 + train_id / 30),
                                   Y=loss_avg_epoch,
                                   win=win,
                                   update='append')
                if win_res != win:
                    vis.line(X=np.ones(1) * (epoch * 30 + train_id / 30),
                             Y=loss_avg_epoch,
                             win=win)

                data_count = 0
                loss_epoch = 0

        # loss_avg_epoch = loss_epoch / (data_count * 1.0)
        # print('loss_avg_epoch:', loss_avg_epoch)

        # 关闭清空一个周期的loss
        win = 'loss'
        vis.close(win)

        if not os.path.exists('weight/'):
            os.mkdir('weight')
        print('saving model ...')
        torch.save(net.state_dict(), 'weight/facebox.pt')