Exemple #1
0
def im2batch(im_bgr, means_bgr, w_h_net, use_cuda):
    '''
    im_bgr_resized = resize_and_fill(im_bgr, means_bgr, w_h_net)
    im_bgr_norm_resized = im_bgr_resized - means_bgr
    '''
    im_bgr_norm_resized = base_transform(im_bgr, w_h_net[0], means_bgr)
    im_rgb_norm_resized = cv2.cvtColor(im_bgr_norm_resized, cv2.COLOR_BGR2RGB)

    #im_rgb_norm_resized = im_rgb_norm_resized.transpose((2, 0, 1))
    #ts_rgb_norm = torch.from_numpy(im_rgb_norm_resized).float()

    ts_rgb_norm = torch.from_numpy(im_rgb_norm_resized.transpose(2, 0,
                                                                 1)).float()

    batch_rgb = ts_rgb_norm.unsqueeze(0)
    #if args.cuda:
    if use_cuda:
        batch_rgb = Variable(batch_rgb.cuda(), volatile=True)
    return batch_rgb
Exemple #2
0
def train():
    net.train()
    train_loss = 0
    dataset = VOCDetection(VOCroot, 'train',
                           base_transform(ssd_dim, rgb_means),
                           AnnotationTransform())

    for epoch in range(args.epochs):
        # load train data & create batch iterator
        batch_iterator = iter(
            data.DataLoader(dataset,
                            batch_size,
                            shuffle=True,
                            collate_fn=detection_collate))
        adjust_learning_rate(optimizer, epoch)

        for iteration in range(len(dataset) // batch_size):
            images, targets = next(batch_iterator)
            if args.cuda:
                images = images.cuda()
                targets = [anno.cuda() for anno in targets]

            images = Variable(images)
            targets = [Variable(t) for t in targets]
            #forward
            t0 = time.time()
            out = net(images)
            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, targets)
            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()
            t1 = time.time()
            print('Timer: ', t1 - t0)
            if args.log_iters:
                print(repr(iteration) + ": Current loss: ", loss.data[0])
            train_loss += loss.data[0]
        train_loss /= (len(dataset) / batch_size)
        torch.save(net.state_dict(), 'ssd_models/' + repr(epoch) + '.pth')
        print('Avg loss for epoch ' + repr(epoch) + ': ' + repr(train_loss))
    torch.save(net, args.save_folder + '' + args.version + '.pth')
Exemple #3
0
def train():
    net.train()
    train_loss = 0
    print('Loading Dataset...')
    dataset = VOCDetection(VOCroot, 'train',
                           base_transform(ssd_dim, rgb_means),
                           AnnotationTransform())
    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on', dataset.name)
    step_index = 0
    for iteration in range(max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(data.DataLoader(dataset,batch_size, \
                                  shuffle=True,collate_fn=detection_collate))
            if iteration in stepvalues:
                step_index += 1
                adjust_learning_rate(optimizer, args.gamma, step_index)

        # load train data
        images, targets = next(batch_iterator)
        images = Variable(images.cuda())
        targets = [Variable(anno.cuda()) for anno in targets]
        #forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss = criterion(out, targets)
        loss.backward()
        optimizer.step()
        t1 = time.time()
        train_loss += loss.data[0]
        if iteration % 10 == 0:
            print('Timer: ', t1 - t0)
            print('Loss: %f' % (loss.data[0]), end=' ')
        if iteration % 5000 == 0:
            torch.save(net.state_dict(),
                       'weights/ssd_iter_new' + repr(iteration) + '.pth')
    torch.save(net, args.save_folder + '' + args.version + '.pth')
Exemple #4
0
    def detect(self, img_cv2_list, batch_size=1, conf_thres=0.25, top_k=200):
        boxes_all = []
        for bs_idx in range(int(np.ceil(len(img_cv2_list) / batch_size))):
            xs = []
            # img_paths = img_list[batch_size * bs_idx: batch_size * (bs_idx + 1)]
            # for img_path in img_paths:
            #     img_cv2 = cv2.imread(img_path)
            #
            #     img_transformed = base_transform(img_cv2, 300, (128.0, 128.0, 128.0))
            #     img_transformed = img_transformed[:, :, (2, 1, 0)]
            #     img_tensor = torch.from_numpy(img_transformed).permute(2, 0, 1)
            #
            #     xs.append(img_tensor)
            #     continue
            imgs = img_cv2_list[batch_size * bs_idx:batch_size * (bs_idx + 1)]
            for img in imgs:
                img_cv2 = img

                img_transformed = base_transform(img_cv2, 300,
                                                 (128.0, 128.0, 128.0))
                img_transformed = img_transformed[:, :, (2, 1, 0)]
                img_tensor = torch.from_numpy(img_transformed).permute(2, 0, 1)

                xs.append(img_tensor)
                continue

            # x = Variable(im.unsqueeze(0))
            images_tensor = torch.stack(xs, 0)
            images = Variable(images_tensor.cuda())

            t1 = time.time()
            detections = self.net(images).data
            t2 = time.time()
            box_num = self.num_cls * self.top_k * 5
            # detect_time = _t['im_detect'].toc(average=False)
            boxes_batch = []

            for bs_idx in range(len(imgs)):
                boxes = []
                # print("len img_paths: ", len(img_paths))
                # print("detections detections: ", len(detections))
                # if os.path.exists(imgs[bs_idx]):
                img_cv2 = imgs[bs_idx]
                rgb_image = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)

                # print(rgb_image.shape[1::-1], rgb_image.shape[1::-1])

                scale = torch.Tensor(
                    [rgb_image.shape[1::-1], rgb_image.shape[1::-1]])

                # dets = detections[bs_idx*box_num: (bs_idx+1)*box_num][bs_idx].view(1, num_classes, 200, 5)

                dets = detections[bs_idx].view(1, self.num_cls, top_k, 5)
                for i in range(dets.size(1)):
                    j = 0
                    while dets[0, i, j, 0] >= conf_thres:
                        score = dets[0, i, j, 0]
                        label_name = self.class_names[i - 1]
                        display_txt = '%s: %.2f' % (label_name, score)
                        # print(display_txt)

                        pt = (dets[0, i, j, 1:] * scale).cpu().numpy(
                        )  # detections的第四维是5个数,表示[cls_conf, x1, y1, x2, y2]
                        coords = (pt[0],
                                  pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1
                        j += 1
                        # print("i, j: ", i, j)
                        boxes.append(
                            (label_name, score, pt[0], pt[1], pt[2], pt[3]))

                boxes_all.append(boxes)
            print("batch forward time: ", t2 - t1, "fusion time: ",
                  time.time() - t2, "total time: ",
                  time.time() - t1)
        return boxes_all
Exemple #5
0
def test_net(save_folder, net, dataset, transform, top_k, detector, priors):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(dataset)
    # all detections are collected into:score
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(len(labelmap) + 1)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    all_time = 0.
    output_dir = get_output_dir(
        pkl_dir,
        args.iteration + '_' + args.dataset_name + '_' + args.set_file_name)
    det_file = os.path.join(output_dir, 'detections.pkl')
    output_dir = get_output_dir(output_dir, 'multi_test')
    ######################### Multiscale PriorBox #####################
    priorboxes = {}
    for v1 in multi_scale[str(ssd_dim)]:
        if not multi_cfg[str(v1)]:
            return ("not included this multi_scale")
        priorbox = PriorBox(multi_cfg[str(v1)])
        img_size = multi_cfg[str(v1)]['min_dim']
        with torch.no_grad():
            priorboxes[str(img_size)] = priorbox.forward().to(device)
    ########################## Detection ##############################
    for i in range(num_images):
        _t['im_detect'].tic()
        image = dataset.pull_image(i)
        h, w, _ = image.shape
        detections_multi = {}
        for v in multi_scale[str(ssd_dim)]:
            priors = priorboxes[str(v)]
            ssd_dim_temp = int(v)
            for loop in range(2):
                if (loop == 0):
                    im_trans = base_transform(image, ssd_dim_temp,
                                              dataset_mean)
                    im_trans = im_trans[:, :, (2, 1, 0)]
                else:
                    im_f = image.copy()
                    im_f = cv2.flip(im_f, 1)
                    im_trans = base_transform(im_f, ssd_dim_temp, dataset_mean)
                    im_trans = im_trans[:, :, (2, 1, 0)]
                with torch.no_grad():
                    x = torch.from_numpy(im_trans).unsqueeze(0).permute(
                        0, 3, 1, 2).to(device)
                    if 'RefineDet' in args.backbone and args.refine:
                        arm_loc, _, loc, conf = net(x)
                        detections = detector.forward(loc,
                                                      conf,
                                                      priors,
                                                      arm_loc_data=arm_loc)
                        detections_multi[str(ssd_dim) + '_' + str(v) + '_' +
                                         str(loop)] = detections.clone()
                    else:
                        loc, conf = net(x)
                        arm_loc = None
                        detections = detector.forward(loc,
                                                      conf,
                                                      priors,
                                                      arm_loc_data=arm_loc)
                        detections_multi[str(ssd_dim) + '_' + str(v) + '_' +
                                         str(loop)] = detections.clone()
        detect_time = _t['im_detect'].toc(average=False)
        if i > 10:
            all_time += detect_time
    ###################################################################
        for j in range(1, detections.size(1)):
            cls_dets = np.array([])
            for k, d in detections_multi.items():
                dets = d[0, j, :]
                if dets.sum() == 0:
                    continue
                mask = dets[:, 0].gt(0.).expand(dets.size(-1),
                                                dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, dets.size(-1))
                boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:]
                if (k[-1] == '1'):
                    boxes[:, 0] = 1 - boxes[:, 0]
                    boxes[:, 2] = 1 - boxes[:, 2]
                    temp_swap = boxes[:, 0].clone()
                    boxes[:, 0] = boxes[:, 2]
                    boxes[:, 2] = temp_swap
                boxes[:, 0] *= w
                boxes[:, 2] *= w
                boxes[:, 1] *= h
                boxes[:, 3] *= h
                if k in ['320_192_0', '320_192_1', '512_320_0', '512_320_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.maximum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) > 32)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_320_0', '320_320_1', '512_512_0', '512_512_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.maximum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) > 0)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_384_0', '320_384_1', '512_640_0', '512_640_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 160)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_448_0', '320_448_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 128)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_512_0', '320_512_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 96)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_576_0', '320_576_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 64)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in [
                        '320_706_0', '320_706_1', '512_1216_0', '512_1216_1'
                ]:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 32)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                if (index_temp.size == 0):
                    continue
                scores = dets[index_temp, 0].cpu().numpy()
                cls_dets_temp = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                if (cls_dets.size == 0):
                    cls_dets = cls_dets_temp.copy()
                else:
                    cls_dets = np.concatenate((cls_dets, cls_dets_temp),
                                              axis=0)
            if (cls_dets.size != 0):
                cls_dets = bbox_vote(cls_dets)
                if (len(cls_dets) != 0):
                    all_boxes[j][i] = cls_dets
        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
                                                    detect_time))
    FPS = (num_images - 10) / all_time
    print('FPS:', FPS)
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset, FPS=FPS)
Exemple #6
0
def main():
    mean = (104, 117, 123)
    print('loading model!')
    if deform:
        from model.dualrefinedet_vggbn import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=1024,
                        def_groups=deform,
                        multihead=multihead,
                        bn=bn)
    else:
        from model.refinedet_vgg import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        use_refine=refine,
                        c7_channel=1024,
                        bn=bn)
    net.load_state_dict(torch.load(trained_model))
    net.eval()
    print('Finished loading model!', trained_model)
    net = net.to(device)
    detector = Detect(num_classes, 0, top_k, confidence_threshold,
                      nms_threshold)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    for i, line in enumerate(open(img_set, 'r')):
        # if i==10:
        #     break
        if 'COCO' in dataset:
            image_name = line[:-1]
            image_id = int(image_name.split('_')[-1])
        elif 'VOC' in dataset:
            image_name = line[:-1]
            image_id = -1
        else:
            image_name, image_id = line.split(' ')
            image_id = image_id[:-1]
        print(i, image_name, image_id)
        image_path = os.path.join(img_root, image_name + '.jpg')
        image = cv2.imread(image_path, 1)
        h, w, _ = image.shape
        image_draw = cv2.resize(image.copy(), (640, 480))
        im_trans = base_transform(image, ssd_dim, mean)
        ######################## Detection ########################
        with torch.no_grad():
            x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1,
                                                                2).to(device)
            if 'RefineDet' in backbone and refine:
                arm_loc, _, loc, conf = net(x)
            else:
                loc, conf = net(x)
                arm_loc = None
            detections = detector.forward(loc,
                                          conf,
                                          priors,
                                          arm_loc_data=arm_loc)
        ############################################################
        out = list()
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            if dets.sum() == 0:
                continue
            mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, dets.size(-1))
            boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            boxes_np = boxes.cpu().numpy()

            for b, s in zip(boxes_np, scores):
                if save_dir:
                    out.append(
                        [int(b[0]),
                         int(b[1]),
                         int(b[2]),
                         int(b[3]), j - 1, s])
                    if 'COCO' in dataset:
                        det_list.append({
                            'image_id':
                            image_id,
                            'category_id':
                            labelmap[j],
                            'bbox': [
                                float('{:.1f}'.format(b[0])),
                                float('{:.1f}'.format(b[1])),
                                float('{:.1f}'.format(b[2] - b[0] + 1)),
                                float('{:.1f}'.format(b[3] - b[1] + 1))
                            ],
                            'score':
                            float('{:.2f}'.format(s))
                        })
                    else:
                        results_file.write(
                            str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' +
                            str(np.around(b[0], 2)) + ' ' +
                            str(np.around(b[1], 2)) + ' ' +
                            str(np.around(b[2], 2)) + ' ' +
                            str(np.around(b[3], 2)) + '\n')
                if display:
                    cv2.rectangle(image_draw,
                                  (int(b[0] / w * 640), int(b[1] / h * 480)),
                                  (int(b[2] / w * 640), int(b[3] / h * 480)),
                                  (0, 255, 0),
                                  thickness=1)

                    cls = class_name[j] if 'COCO' in dataset else str(
                        labelmap[j - 1])
                    put_str = cls + ':' + str(np.around(s, decimals=2))
                    cv2.putText(
                        image_draw,
                        put_str,
                        (int(b[0] / w * 640), int(b[1] / h * 480) - 10),
                        cv2.FONT_HERSHEY_DUPLEX,
                        0.5,
                        color=(0, 255, 0),
                        thickness=1)
        if display:
            cv2.imshow('frame', image_draw)
            ch = cv2.waitKey(0)
            if ch == 115:
                if save_dir:
                    print('save: ', line)
                    torch.save(
                        out, os.path.join(save_dir, '%s.pkl' % str(line[:-1])))
                    cv2.imwrite(
                        os.path.join(save_dir, '%s.jpg' % str(line[:-1])),
                        image)
                    cv2.imwrite(
                        os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])),
                        image_draw)

    cv2.destroyAllWindows()
    if save_dir:
        if dataset == 'COCO':
            json.dump(det_list, results_file)
        results_file.close()
Exemple #7
0
                score = detections[0, i, j, 0]
                label_name = labelmap[i - 1]
                pt = (detections[0, i, j, 1:] * scale).cpu().numpy()
                coords = (pt[0], pt[1], pt[2], pt[3])
                pred_num += 1
                with open(filename, mode='a') as f:
                    f.write(str(pred_num)+' label: '+label_name+' score: ' \
                            +str(score) +' '+' || '.join(str(c) for c in coords)+'\n')
                j += 1


if __name__ == '__main__':
    # load net
    net = build_ssd('test', 300, 21)  # initialize SSD
    net.load_state_dict(torch.load(args.trained_model))
    net.eval()
    print('Finished loading model!')
    # load data
    valset = VOCDetection(VOCroot, 'val', None, AnnotationTransform())
    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    # evaluation
    test_net(args.save_folder,
             net,
             args.cuda,
             valset,
             base_transform(net.size, (104, 117, 123)),
             args.top_k,
             thresh=args.confidence_threshold)
Exemple #8
0
def main(net):
    logging.info("==> do detect on every image with model reference.")

    img_list_file = "/home/hyer/datasets/OCR/ssd_k1_test.txt"
    with open(img_list_file, "r") as f:
        data = f.readlines()

    img_list = []
    for li in data:
        img_list.append(li.strip())

    batch_size = 4
    colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist()

    for bs_idx in range(int(np.ceil(len(img_list) / batch_size))):
        xs = []
        # im, gt, h, w = dataset.pull_item(i)
        # img_cv2 = cv2.imread("/home/hyer/datasets/OCR/scan_k1/b7f44061-6d4c-11e8-ad39-480fcf43d407.jpg")
        img_paths = img_list[batch_size * bs_idx:batch_size * (bs_idx + 1)]
        for img_path in img_paths:
            # img_cv2 = cv2.imread(input("imgPath: "))
            img_cv2 = cv2.imread(img_path)

            img_transformed = base_transform(img_cv2, 300, dataset_mean)
            img_transformed = img_transformed[:, :, (2, 1, 0)]
            img_tensor = torch.from_numpy(img_transformed).permute(2, 0, 1)

            xs.append(img_tensor)
            continue

        # x = Variable(im.unsqueeze(0))
        images_tensor = torch.stack(xs, 0)
        images = Variable(images_tensor.cuda())

        t1 = time.time()
        detections = net(images).data
        t2 = time.time()
        box_num = num_classes * top_k * 5
        # detect_time = _t['im_detect'].toc(average=False)
        boxes_batch = []

        for bs_idx in range(batch_size):
            boxes = []
            currentAxis = plt.gca()

            # image = cv2.imread("/home/hyer/Pictures/2334.jpg")
            img_cv2 = cv2.imread(img_paths[bs_idx])
            rgb_image = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
            # plt.figure("result")
            colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist()
            plt.imshow(rgb_image)  # plot the image for matplotlib
            currentAxis = plt.gca()

            print(rgb_image.shape[1::-1], rgb_image.shape[1::-1])

            scale = torch.Tensor(
                [rgb_image.shape[1::-1], rgb_image.shape[1::-1]])

            # dets = detections[bs_idx*box_num: (bs_idx+1)*box_num][bs_idx].view(1, num_classes, 200, 5)
            draw = False
            dets = detections[bs_idx].view(1, num_classes, top_k, 5)
            for i in range(dets.size(1)):
                j = 0
                while dets[0, i, j, 0] >= conf_thres:
                    score = dets[0, i, j, 0]
                    label_name = class_names[i - 1]
                    display_txt = '%s: %.2f' % (label_name, score)
                    print(display_txt)

                    pt = (dets[0, i, j, 1:] * scale).cpu().numpy(
                    )  # detections的第四维是5个数,表示[cls_conf, x1, y1, x2, y2]
                    coords = (pt[0],
                              pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1
                    color = colors[i]
                    if draw:
                        currentAxis.add_patch(
                            plt.Rectangle(*coords,
                                          fill=False,
                                          edgecolor=color,
                                          linewidth=2))
                        currentAxis.text(pt[0],
                                         pt[1],
                                         display_txt,
                                         bbox={
                                             'facecolor': color,
                                             'alpha': 0.5
                                         })
                    j += 1
                    # print("i, j: ", i, j)
                    boxes.append(
                        (label_name, score, pt[0], pt[1], pt[2], pt[3]))

            boxes_batch.append(boxes)
            # plt.figure(figsize=(10, 10))
            # plt.imshow(rgb_image)
            if draw:
                plt.show()
                plt.savefig("./test.jpg")
        print("batch forward time: ", t2 - t1, "fusion time: ",
              time.time() - t2, "total time: ",
              time.time() - t1)
Exemple #9
0
def main():
    mean = (104, 117, 123)
    trained_model = model_dir
    print('loading model!')
    net = build_ssd('test',
                    ssd_dim,
                    num_classes,
                    tssd=tssd,
                    top_k=top_k,
                    thresh=confidence_threshold,
                    nms_thresh=nms_threshold,
                    attention=attention,
                    prior=prior,
                    tub=tub,
                    tub_thresh=tub_thresh,
                    tub_generate_score=tub_generate_score)
    net.load_state_dict(torch.load(trained_model))
    net.eval()

    print('Finished loading model!', model_dir)

    net = net.cuda()
    cudnn.benchmark = True

    frame_num = 0
    cap = cv2.VideoCapture(video_name)
    w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print(w, h)
    if save_dir:
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        size = (640, 480)
        record = cv2.VideoWriter(
            os.path.join(save_dir,
                         video_name.split('/')[-1].split('.')[0] + '_OTA.avi'),
            fourcc, cap.get(cv2.CAP_PROP_FPS), size)

    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)

    att_criterion = AttentionLoss((h, w))
    state = [None] * 6 if tssd in ['lstm', 'tblstm', 'outlstm'] else None
    while (cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        frame_draw = frame.copy()
        frame_num += 1
        im_trans = base_transform(frame, ssd_dim, mean)
        x = Variable(torch.from_numpy(im_trans).unsqueeze(0).permute(
            0, 3, 1, 2),
                     volatile=True)
        x = x.cuda()
        if tssd == 'ssd':
            detections, att_map = net(x)
            detections = detections.data
        else:
            detections, state, att_map = net(x, state)
            detections = detections.data
            # print(np.around(t_diff, decimals=4))
        out = list()
        for j in range(1, detections.size(1)):
            for k in range(detections.size(2)):
                dets = detections[0, j, k, :]
                if dets.dim() == 0:
                    continue
                boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:]
                identity = dets[-1] if dets.size(0) == 6 else -1
                x_min = int(boxes[0] * w)
                x_max = int(boxes[2] * w)
                y_min = int(boxes[1] * h)
                y_max = int(boxes[3] * h)

                score = dets[0]
                if score > confidence_threshold:
                    out.append(
                        [x_min, y_min, x_max, y_max, j - 1, score, identity])

        if attention:
            _, up_attmap = att_criterion(
                att_map)  # scale, batch, tensor(1,h,w)
            att_target = up_attmap[0][0].cpu().data.numpy().transpose(1, 2, 0)
        for object in out:
            x_min, y_min, x_max, y_max, cls, score, identity = object
            if identity in [0]:
                color = (0, 0, 255)
            elif identity in [1]:
                color = (0, 200, 0)
            elif identity in [2]:
                color = (255, 128, 0)
            elif identity in [3]:
                color = (255, 0, 255)
            elif identity in [4]:
                color = (0, 128, 255)
            elif identity in [5]:
                color = (255, 128, 128)
            else:
                color = (255, 0, 0)
            cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max),
                          color,
                          thickness=2)
            cv2.fillConvexPoly(
                frame_draw,
                np.array([[x_min - 1, y_min], [x_min - 1, y_min - 50],
                          [x_max + 1, y_min - 50], [x_max + 1, y_min]],
                         np.int32), color)
            if dataset_name == 'VID2017':
                put_str = str(
                    int(identity)) + ':' + VID_CLASSES_name[cls] + ':' + str(
                        np.around(score, decimals=2))
            else:
                put_str = str(int(identity))

            cv2.putText(frame_draw,
                        put_str, (x_min + 10, y_min - 10),
                        cv2.FONT_HERSHEY_DUPLEX,
                        1,
                        color=(255, 255, 255),
                        thickness=1)
            print(
                str(frame_num) + ':' + str(np.around(score, decimals=2)) +
                ',' + VID_CLASSES_name[cls])
        if not out:
            print(str(frame_num))
        cv2.imshow('frame', cv2.resize(frame_draw, (640, 360)))
        if save_dir:
            frame_write = cv2.resize(frame_draw, size)
            record.write(frame_write)
        ch = cv2.waitKey(1)
        if ch == 32:
            while 1:
                in_ch = cv2.waitKey(10)
                if in_ch == 115:  # 's'
                    if save_dir:
                        print('save: ', frame_num)
                        torch.save(
                            out,
                            os.path.join(save_dir,
                                         tssd + '_%s.pkl' % str(frame_num)))
                        cv2.imwrite(
                            os.path.join(save_dir, '%s.jpg' % str(frame_num)),
                            frame)
                elif in_ch == 32:
                    break

    cap.release()
    if save_dir:
        record.release()
    cv2.destroyAllWindows()
Exemple #10
0
        fp_cumsum = torch.cumsum(torch.Tensor(fp[cl]), 0)
        gt_cumsum = torch.cumsum(torch.Tensor(gts[cl]), 0)
        rec_cumsum = tp_cumsum.float() / gt_cumsum[-1]
        prec_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum).clamp(min=1e-6)
        ap[cl] = voc_ap(rec_cumsum, prec_cumsum)
        recall[cl] = rec_cumsum[-1]
        precision[cl] = prec_cumsum[-1]
        print('class %d rec %.4f prec %.4f AP %.4f tp %.4f fp %.4f, \
        gt %.4f' % (cl, recall[cl], precision[cl], ap[cl], sum(tp[cl]),
              sum(fp[cl]), sum(gts[cl])))
    # mAP = mean of APs for all classes
    mAP = sum(ap.values()) / len(ap)
    print('mAP', mAP)
    return mAP


if __name__ == '__main__':
    # load net
    net = build_ssd('test', 300, 21)    # initialize SSD
    net.load_state_dict(torch.load(args.trained_model))
    net.eval()
    print('Finished loading model!')
    # load data
    dataset = VOCDetection(VOCroot, 'test', None, AnnotationTransform())
    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    # evaluation
    eval_net(net, args.cuda, dataset, base_transform(
        net.size, (104, 117, 123)), args.top_k)
Exemple #11
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0
    print('Loading Dataset...')
    dataset = VOCDetection(VOCroot, 'train',
                           base_transform(ssd_dim, rgb_means),
                           AnnotationTransform())
    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on', dataset.name)
    step_index = 0
    if args.visdom:
        # initialize visdom loss plot
        lot = viz.line(X=torch.zeros((1, )).cpu(),
                       Y=torch.zeros((1, 3)).cpu(),
                       opts=dict(xlabel='Iteration',
                                 ylabel='Loss',
                                 title='Current SSD Training Loss',
                                 legend=['Loc Loss', 'Conf Loss', 'Loss']))
        epoch_lot = viz.line(X=torch.zeros((1, )).cpu(),
                             Y=torch.zeros((1, 3)).cpu(),
                             opts=dict(
                                 xlabel='Epoch',
                                 ylabel='Loss',
                                 title='Epoch SSD Training Loss',
                                 legend=['Loc Loss', 'Conf Loss', 'Loss']))
    for iteration in range(max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                collate_fn=detection_collate))
            if iteration in stepvalues:
                step_index += 1
                adjust_learning_rate(optimizer, args.gamma, step_index)
            if args.visdom:
                viz.line(
                    X=torch.ones((1, 3)).cpu() * epoch,
                    Y=torch.Tensor([loc_loss, conf_loss, loc_loss + conf_loss
                                    ]).unsqueeze(0).cpu() / epoch_size,
                    win=epoch_lot,
                    update='append')
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        # load train data
        images, targets = next(batch_iterator)
        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]
        if iteration % 10 == 0:
            print('Timer: %.4f sec.' % (t1 - t0))
            print('iter ' + repr(iteration) + ' || Loss: %.4f ||' %
                  (loss.data[0]),
                  end=' ')
        if args.visdom:
            viz.line(X=torch.ones((1, 3)).cpu() * iteration,
                     Y=torch.Tensor([
                         loss_l.data[0], loss_c.data[0],
                         loss_l.data[0] + loss_c.data[0]
                     ]).unsqueeze(0).cpu(),
                     win=lot,
                     update='append')
            # hacky fencepost solution for 0th epoch plot
            if iteration == 0:
                viz.line(X=torch.zeros((1, 3)).cpu(),
                         Y=torch.Tensor(
                             [loc_loss, conf_loss,
                              loc_loss + conf_loss]).unsqueeze(0).cpu(),
                         win=epoch_lot,
                         update=True)
        if iteration % 5000 == 0:
            torch.save(net.state_dict(),
                       'weights/ssd_iter_new' + repr(iteration) + '.pth')
    torch.save(net, args.save_folder + '' + args.version + '.pth')
Exemple #12
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    cum_loc_loss = 0  # cumulative
    cum_conf_loss = 0
    epoch = 0
    print('Loading Dataset...')
    dataset = VOCDetection(VOCroot, 'train',
                           base_transform(ssd_dim, rgb_means),
                           AnnotationTransform())
    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on', dataset.name)
    step_index = 0
    if args.visdom:
        # initialize visdom loss plot
        lot = viz.line(X=torch.zeros((1, )),
                       Y=torch.zeros((1, 6)),
                       opts=dict(xlabel='Epoch',
                                 ylabel='Loss',
                                 title='Real-time SSD Training Loss',
                                 legend=[
                                     'Cur Loc Loss', 'Cur Conf Loss',
                                     'Cur Loss', 'Cum Loc Loss',
                                     'Cum Conf Loss', 'Cum Loss'
                                 ]))
    for iteration in range(max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                collate_fn=detection_collate))
            if iteration in stepvalues:
                step_index += 1
                adjust_learning_rate(optimizer, args.gamma, step_index)
            cum_loc_loss += loc_loss
            cum_conf_loss += conf_loss
            epoch += 1
            if args.visdom:
                loss_list = [
                    loc_loss, conf_loss, loc_loss + conf_loss, cum_loc_loss,
                    cum_conf_loss, cum_loc_loss + cum_conf_loss
                ]
                viz.line(X=torch.ones((1, 6)) * epoch,
                         Y=torch.tensor(loss_list).unsqueeze(0),
                         win=lot,
                         update='append')
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0

        # load train data
        images, targets = next(batch_iterator)
        images = Variable(images.cuda())
        targets = [Variable(anno.cuda()) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]
        if iteration % 10 == 0:
            print('Timer: ', t1 - t0)
            print('Loss: %f' % (loss.data[0]), end=' ')
        if iteration % 5000 == 0:
            torch.save(net.state_dict(),
                       'weights/ssd_iter_new' + repr(iteration) + '.pth')
    torch.save(net, args.save_folder + '' + args.version + '.pth')
Exemple #13
0
def main():
    mean = (104, 117, 123)
    if 'FPN' in backbone:
        from model.refinedet_vgg import build_net
        static_net = build_net('test',
                               size=ssd_dim,
                               num_classes=num_classes,
                               c7_channel=c7_channel,
                               bn=bn)
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=c7_channel,
                        bn=bn)
    else:
        from model.ssd4scale_vgg import build_net
        static_net = build_net('test',
                               size=ssd_dim,
                               num_classes=num_classes,
                               c7_channel=c7_channel,
                               bn=bn)
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=c7_channel,
                        bn=bn,
                        deform=deform)

    print('loading model!')
    static_net.load_state_dict(torch.load(static_dir))
    static_net.eval()
    static_net = static_net.to(device)
    net.load_state_dict(torch.load(trn_dir))
    net.eval()
    net = net.to(device)
    print('Finished loading model!', static_dir, trn_dir)
    detector = Detect(num_classes, 0, top_k, confidence_threshold,
                      nms_threshold)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward().to(device)

    frame_num = 0
    cap = cv2.VideoCapture(video_name)
    w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
    size = (640, 480)
    if save_dir:
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        record = cv2.VideoWriter(
            os.path.join(save_dir,
                         video_name.split('/')[-1].split('.')[0] + '.avi'),
            fourcc, cap.get(cv2.CAP_PROP_FPS), size)
    # static_flag = True
    offset_list = list()
    ref_loc = list()
    while (cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        h, w, _ = frame.shape
        frame_draw = frame.copy()
        im_trans = base_transform(frame, ssd_dim, mean)
        with torch.no_grad():
            x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1,
                                                                2).to(device)
            if frame_num % interval == 0:
                # if static_flag:
                static_out = static_net(x, ret_loc=deform)
                priors_static = center_size(
                    decode(static_out[0][0], priors, [0.1, 0.2]))
                if deform:
                    ref_loc = static_out[
                        2]  # [o * args.loose for o in static_out[2]]
                    offset_list = list()
            out = net(x,
                      ref_loc=ref_loc,
                      offset_list=offset_list,
                      ret_off=(False, True)[deform and not offset_list])
            detections = detector.forward(out[0],
                                          out[1],
                                          priors_static,
                                          scale=torch.cuda.FloatTensor(
                                              [w, h, w, h]))
            if len(detections) == 3:
                offset_list = out[2]
                ref_loc = list()
            # if static_flag:
            #     ref_mask = mask.clone()mask
            #     print('static')
            #     static_flag = False
            # else:
            #     time1 = time.time()
            #     s_score = (mask * ref_mask).sum().float() / (mask + ref_mask).sum().float()
            #     static_flag = (False, True)[s_score<0.45]
            #     time2 = time.time()
            #     print(s_score, 'match time:', time2-time1)
        out = list()
        for j in range(1, detections.size(1)):
            if detections[0, j, :, :].sum() == 0:
                continue
            for k in range(detections.size(2)):
                dets = detections[0, j, k, :]
                if dets.sum() == 0:
                    continue
                boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:]
                identity = dets[-1] if dets.size(0) == 6 else -1
                x_min = int(boxes[0] * w)
                x_max = int(boxes[2] * w)
                y_min = int(boxes[1] * h)
                y_max = int(boxes[3] * h)

                score = dets[0]
                if score > confidence_threshold:
                    put_str = VID_CLASSES_name[j - 1] + ':' + str(
                        np.around(score,
                                  decimals=2)).split('(')[-1].split(',')[0][:4]
                    color = (255, 0, 0)
                    cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max),
                                  color,
                                  thickness=2)
                    cv2.putText(frame_draw,
                                put_str, (x_min + 10, y_min - 10),
                                cv2.FONT_HERSHEY_DUPLEX,
                                0.8,
                                color=color,
                                thickness=1)
        print(str(frame_num))
        frame_num += 1
        frame_show = cv2.resize(frame_draw, size)
        cv2.imshow('frame', frame_show)  # 255* mask.cpu().numpy())
        if save_dir:
            record.write(frame_show)
        ch = cv2.waitKey(1)
        if ch == 32:
            # if frame_num % 1 ==0:
            while 1:
                in_ch = cv2.waitKey(10)
                if in_ch == 115:  # 's'
                    if save_dir:
                        print('save: ', frame_num)
                        torch.save(
                            out,
                            os.path.join(save_dir, '_%s.pkl' % str(frame_num)))
                        cv2.imwrite(
                            os.path.join(save_dir, '%s.jpg' % str(frame_num)),
                            frame)
                elif in_ch == 32:
                    break

    cap.release()
    if save_dir:
        record.release()
    cv2.destroyAllWindows()