Ejemplo n.º 1
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh,
                             force_cpu=not cfg.USE_GPU_NMS)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Ejemplo n.º 2
0
def loop():

    args = parse_args()

    print('Called with args:')
    print(args)

    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_test"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "vg":
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']

    args.cfg_file = "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = False
    imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))



    # initilize the network here.
    if args.net == 'vgg16':
        fpn = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fpn = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fpn = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fpn = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()
    fpn.create_architecture()
    print('load model successfully!')
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    if args.cuda:
        cfg.CUDA = True
    if args.cuda:
        fpn.cuda()
    start = time.time()
    max_per_image = 100
    vis =True #args.vis

    if vis:
        thresh = 0.0
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)



    for h in range(200):
        dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                                 imdb.num_classes, training=False, normalize=False)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
                                                 shuffle=False, num_workers=0,
                                                 pin_memory=True)

        data_iter = iter(dataloader)

        _t = {'im_detect': time.time(), 'misc': time.time()}
        det_file = os.path.join(output_dir, 'detections.pkl')
        input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(input_dir):
            raise Exception('There is no input directory for loading network from ' + input_dir)
        load_name = os.path.join(input_dir,
                                 'fpn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

        print("load checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        fpn.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']


        fpn.eval()
        empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
        for i in range(num_images):
            data = data_iter.next()
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            det_tic = time.time()
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data  # 1*300*10
            boxes = rois.data[:, :, 1:5]  # 1*300*4

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data  # 1*300*40
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = boxes

            pred_boxes /= data[1][0][2].cuda()

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()
            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()
            if vis:
                im = cv2.imread(imdb.image_path_at(i))
                im2show = np.copy(im)
            for j in range(1, imdb.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                    # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets, cfg.TEST.NMS,~args.cuda)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if vis:
                        im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3)
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = empty_array

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                image_scores = np.hstack([all_boxes[j][i][:, -1]
                                          for j in range(1, imdb.num_classes)])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(i + 1, num_images, detect_time, nms_time))
            sys.stdout.flush()

            if vis:
                cv2.imwrite('images/result%d_%d.png' %(args.checkepoch,i), im2show)
                #pdb.set_trace()
                # cv2.imshow('test', im2show)
                # cv2.waitKey(0)
            del data
            del pred_boxes
            del scores
            torch.cuda.empty_cache()

        with open(det_file, 'wb') as f:
            cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

        print('Evaluating detections')
        aps, clss = imdb.evaluate_detections(all_boxes, output_dir)
        #print(aps)
        with open("result.txt", 'a+') as f:
            # print(args.checkepoch)
            lp=""
            cc=0
            for b in clss:
                if cc!=len(clss)-1:
                    lp=lp+"'"+str(b) + ":" + str(aps[cc])+"',"
                else:
                    lp = lp + "'" + str(b) + ":" + str(aps[cc])+"'"
                cc=cc+1

            sp = "["+lp+ "] ls:" + str(args.checksession) + "_" + str(args.checkepoch)
            # print(sp)
            f.write(sp + "\n")
        end = time.time()
        print("test time: %0.4fs" % (end - start))

        args.checkepoch = args.checkepoch + 1

        del data_iter
        del dataset
        del dataloader

        torch.cuda.empty_cache()
        #torch.empty_cache()
        gc.collect()
Ejemplo n.º 3
0
def test_net(tdcnn_demo, dataloader, args):
    start = time.time()
    # TODO: Add restriction for max_per_video
    max_per_video = 0

    if args.vis:
        thresh = 0.05
    else:
        thresh = 0.005

    all_twins = [[[] for _ in xrange(args.num_videos)]
                 for _ in xrange(args.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}

    tdcnn_demo.eval()
    empty_array = np.transpose(np.array([[], [], []]), (1, 0))

    data_tic = time.time()
    for i, (video_data, gt_twins, num_gt, video_info) in enumerate(dataloader):
        video_data = video_data.cuda()
        gt_twins = gt_twins.cuda()
        batch_size = video_data.shape[0]
        data_toc = time.time()
        data_time = data_toc - data_tic

        det_tic = time.time()
        rois, cls_prob, twin_pred = tdcnn_demo(video_data, gt_twins)
        #        rpn_loss_cls, rpn_loss_twin, \
        #        RCNN_loss_cls, RCNN_loss_twin, rois_label = tdcnn_demo(video_data, gt_twins)

        scores_all = cls_prob.data
        twins = rois.data[:, :, 1:3]

        if cfg.TEST.TWIN_REG:  # True
            # Apply bounding-twin regression deltas
            twin_deltas = twin_pred.data
            if cfg.TRAIN.TWIN_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                twin_deltas = twin_deltas.view(-1, 2) * torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_STDS).type_as(twin_deltas) \
                       + torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_MEANS).type_as(twin_deltas)
                twin_deltas = twin_deltas.view(batch_size, -1,
                                               2 * args.num_classes)

            pred_twins_all = twin_transform_inv(twins, twin_deltas, batch_size)
            pred_twins_all = clip_twins(pred_twins_all, cfg.TRAIN.LENGTH[0],
                                        batch_size)
        else:
            # Simply repeat the twins, once for each class
            pred_twins_all = np.tile(twins, (1, scores_all.shape[1]))

        det_toc = time.time()
        detect_time = det_toc - det_tic

        for b in range(batch_size):
            misc_tic = time.time()
            print(video_info[b])
            scores = scores_all[b]  #scores.squeeze()
            pred_twins = pred_twins_all[b]  #.squeeze()

            # skip j = 0, because it's the background class
            for j in xrange(1, args.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_twins = pred_twins[inds][:, j * 2:(j + 1) * 2]

                    cls_dets = torch.cat((cls_twins, cls_scores.unsqueeze(1)),
                                         1)
                    # cls_dets = torch.cat((cls_twins, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets, cfg.TEST.NMS)
                    if (len(keep) > 0):
                        cls_dets = cls_dets[keep.view(-1).long()]
                        print("activity: ", j)
                        print(cls_dets.cpu().numpy())

                    all_twins[j][i * batch_size + b] = cls_dets.cpu().numpy()
                else:
                    all_twins[j][i * batch_size + b] = empty_array

            # Limit to max_per_video detections *over all classes*
            if max_per_video > 0:
                video_scores = np.hstack([
                    all_twins[j][i * batch_size + b][:, -1]
                    for j in xrange(1, args.num_classes)
                ])
                if len(video_scores) > max_per_video:
                    video_thresh = np.sort(video_scores)[-max_per_video]
                    for j in xrange(1, args.num_classes):
                        keep = np.where(
                            all_twins[j][i * batch_size +
                                         b][:, -1] >= video_thresh)[0]
                        all_twins[j][i * batch_size +
                                     b] = all_twins[j][i * batch_size +
                                                       b][keep, :]

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic
            print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s' \
              .format(i*batch_size+b+1, args.num_videos, data_time/batch_size, detect_time/batch_size, nms_time))

        if args.vis:
            pass

        data_tic = time.time()
    end = time.time()
    print("test time: %0.4fs" % (end - start))
Ejemplo n.º 4
0
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, pascal_classes[j],
                                             cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()

        if vis and webcam_num == -1:
def interest(im2show, data, fpn, all_position, i, all_boxes, r_w, r_h, rat_w,
             rat_h):

    for key, value in all_position.items():
        x = int(((value[2] - value[0]) / 2 + value[0]) * rat_w)
        y = int(((value[3] - value[1]) / 2 + value[1]) * rat_h)
        data_tem = data[0][:, :, y - int(HIGHT / 2):y + int(HIGHT / 2),
                           x - int(WIDTH / 2):x + int(WIDTH / 2)]
        #print(data[0].shape())
        w = len(data_tem[0][0][0])
        h = len(data_tem[0][0])
        print("INER", w, h)
        if w <= 0 or h <= 0:
            return None
        if args.cuda:
            data_tem1 = torch.from_numpy(np.array([[h, w,
                                                    w / h]])).float().cuda()
            data_tem2 = torch.from_numpy(np.array([[1, 1, 1, 1,
                                                    1]])).float().cuda()
            data_tem3 = torch.from_numpy(np.array([1])).long().cuda()
        else:
            data_tem1 = torch.from_numpy(np.array([[h, w, w / h]])).float()
            data_tem2 = torch.from_numpy(np.array([[1, 1, 1, 1, 1]])).float()
            data_tem3 = torch.from_numpy(np.array([1])).long()
        im_data.data.resize_(data_tem.size()).copy_(data_tem)
        im_info.data.resize_(data_tem1.size()).copy_(data_tem1)
        gt_boxes.data.resize_(data_tem2.size()).copy_(data_tem2)
        num_boxes.data.resize_(data_tem3.size()).copy_(data_tem3)
        rois, cls_prob, bbox_pred, \
        _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes)
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]  # 忽略掉前面一个数值,后面都是BOX
        if cfg.TEST.BBOX_REG:
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                if args.class_agnostic:
                    if args.cuda:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if args.cuda:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            pred_boxes = boxes
        pred_boxes /= data_tem1[0][2]
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        for j in range(1, imdb.num_classes):  # 遍历每一类
            inds = torch.nonzero(scores[:, j] > 0.6).view(-1)
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)  # 排序分数列表降低序
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                for c in range(len(cls_boxes)):  # 调整,获取小图片在大图片里面的坐标
                    cls_boxes[c][0] = (cls_boxes[c][0] + x -
                                       int(WIDTH / 2)) / rat_w
                    cls_boxes[c][1] = (cls_boxes[c][1] + y -
                                       int(HIGHT / 2)) / rat_h
                    cls_boxes[c][2] = (cls_boxes[c][2] + x -
                                       int(WIDTH / 2)) / rat_w
                    cls_boxes[c][3] = (cls_boxes[c][3] + y -
                                       int(HIGHT / 2)) / rat_h

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                     1)  # 追加
                cls_dets = cls_dets[order]  # 将torch.tensor 按给定的训练排序
                keep = nms(cls_dets, cfg.TEST.NMS, args.cuda)  # 非极大值抑制,获取要保留的
                cls_dets = cls_dets[keep.view(-1).long()]  # 从tensor里面拿出对应的数据结构

                if all_boxes[j][i] == []:
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = np.vstack(
                        (all_boxes[j][i], cls_dets.cpu().numpy()))
    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        anchors = anchors[0]
        # do not backward
        anchors = anchors
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4)
        # apply deltas to anchors to decode
        # loop here due to many features maps
        # proposals = []
        # for rpn_bbox_preds_single_map, anchors_single_map in zip(
        # rpn_bbox_preds, anchors):
        # proposals.append(
        # self.bbox_coder.decode(rpn_bbox_preds_single_map,
        # anchors_single_map))
        # proposals = torch.cat(proposals, dim=1)

        proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :]
        fg_probs = fg_probs.permute(0, 2, 3,
                                    1).contiguous().view(batch_size, -1)

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(
                torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1),
                self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single
        return proposals_batch, proposals_order
Ejemplo n.º 7
0
    def forward(self, input):

        #input=(rpn_cls_prob.data, rpn_bbox_pred.data,im_info, cfg_key)
        #rpn_cls_prob=(b,2*9,w,h)  rpn_bbox_pred=(b,4*9,w,h)  im_info=(b,3)=[[w,h,3],[..]](这里wh是原图的尺寸)   cfg_key=‘train’or‘test’

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        '''

        :param input:
        :return:
        '''
        '''
        这里的rois(proposal)产生的过程:
        1:先生成9*w*h个anchor的坐标--->(b,w*h*9,4)
        2:根据预测的9*w*h个anchor的回归值对所有的anchor进行位置调整(超出边界的框进行修剪)
        3:然后针对batch中的每一张图片进行:
            1:按照前景分数取出9*w*h中前12000个框的分数以及他们的位置box(test:6000)
            2:对这12000(train)个box进行nms,取出nms之后剩下的框里面的前2000个box的位置和分数(按照分数取)
            3:将每张图片的2000个保留的box合并到一起(合到一个batch里面)
        4:返回该batch保留的box(b,2000,5)
        
        '''

        scores = input[
            0][:,
               self._num_anchors:, :, :]  #shape=(b,9,w,h)取出预测的所有的anchor的前景概率
        bbox_deltas = input[1]  #=(b,4*9,w,h)
        im_info = input[2]  #=(b,3)
        cfg_key = input[3]  #=train

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  #train:12000   test:6000
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  #train:2000   test:300
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  #train:0.7        test:0.7
        min_size = cfg[cfg_key].RPN_MIN_SIZE  #train:8 rpn的最小尺寸 test:16

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)  #h,w
        shift_x = np.arange(
            0, feat_width) * self._feat_stride  #[0,16,16*2,16*3,...16*h]
        shift_y = np.arange(
            0, feat_height) * self._feat_stride  #[0,16,16*2,16*3,...16*w]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack(
                (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                 shift_y.ravel()
                 )).transpose())  #shifts.shape=(w*h,4)坐标是相对于原图的  4:[x,y,x,y]
        shifts = shifts.contiguous().type_as(
            scores).float()  #shifts.shape=(w*h,4)坐标是相对于原图的

        A = self._num_anchors  #9
        K = shifts.size(0)  #w*h

        self._anchors = self._anchors.type_as(
            scores)  #shape=(9,4)每个位置9个anchor的尺寸
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
        #anchors.shape=(b,w*h*9,4)
        #到这里就产生了默认的anchor

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3,
                                          1).contiguous()  #shape=(b,w,h,4*9)
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)  #shape=(b,w*h*9,4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()  #(b,w,h,9)
        scores = scores.view(batch_size, -1)  #shape=(b,w*h*9)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
        #proposal.shape=(b,w*h*9,4) 根据预测出来的偏移调整acnhor的位置, 4:[x1,y1,x2,y2]两个角点的坐标

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores  #shape=(b,w*h*9)
        proposals_keep = proposals  #shape=(b,w*h*9,4) 根据预测出来的偏移调整的acnhor的位置, 4:[x1,y1,x2,y2]两个角点的坐标
        _, order = torch.sort(scores_keep, 1, True)
        #order.shape=[b,w*h*9] w*h*9个数[2,1,0,3,,,]表示分数从高到低排序,各个框的idx(分数第2>第1》第0个》第3个框。。。。)

        output = scores.new(batch_size, post_nms_topN,
                            5).zero_()  #shape=(b,2000,5) 全零
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[
                i]  #shape=(w*h*9,4) 根据预测出来的偏移调整acnhor的位置, 4:[x1,y1,x2,y2]两个角点的坐标(取出每张图片上的回归之后的anchor)
            scores_single = scores_keep[i]  #shape=(w*h*9)每张图片上预测的anchor的前景概率

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[
                i]  #order_single.shape=(w*h*9) w*h*9个数[2,1,0,3,,,]表示分数从高到低排序,各个框的idx(分数第2>第1》第0个》第3个框。。。。)
            #到此位置取出了一张图片上的所有的调整之后的anchor,以及按分数排序的索引

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:
                                            pre_nms_topN]  #order_single.shape=(12000)只取出高分前12000个框的索引

            proposals_single = proposals_single[
                order_single, :]  ##shape=(12000,4)根据索引取出高分的12000个框的坐标
            scores_single = scores_single[order_single].view(
                -1, 1)  ##shape=(12000,1)根据索引取出高分的12000个框的分数

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(
                torch.cat((proposals_single, scores_single), 1),
                nms_thresh,
                force_cpu=not cfg.USE_GPU_NMS)  #shape=(k,1),每个值代表要保留的bbox的索引
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:
                                        post_nms_topN]  #只取nms之后保留的 前2000个  shape=(2000) ,里面的值代表要保留的box的索引
            proposals_single = proposals_single[keep_idx_i, :]  #shape=(2000,4)
            scores_single = scores_single[keep_idx_i, :]  #shape=(2000)

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)  #=2000,得到的proposal的个数
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single
            #output.shape=(b,2000,5)  5:[当前box在那个图片上(0-batchsize), x1,y1,x2,y2]

        return output
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, :, 1]  # batch_size x num_rois x 1
        bbox_deltas = input[1]  # batch_size x num_rois x 4
        im_info = input[2]
        cfg_key = input[3]
        feat_shapes = input[4]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        anchors = torch.from_numpy(
            generate_anchors_all_pyramids(
                self._fpn_scales, self._anchor_ratios, feat_shapes,
                self._fpn_feature_strides,
                self._fpn_anchor_stride)).type_as(scores)
        num_anchors = anchors.size(0)

        anchors = anchors.view(1, num_anchors,
                               4).expand(batch_size, num_anchors, 4)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()

        scores_keep = scores
        proposals_keep = proposals

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            #print(cfg.CUDA)
            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh, cfg.CUDA)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Ejemplo n.º 9
0
    def detect_img(self, img, gpus=0):
        """
        :param img: numpy array
        :return:
        """
        im_in = img
        im = im_in[:, :, ::-1]

        blobs, im_scales = self._get_image_blob(im)
        im_blob = blobs
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)


        # output
        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        num_boxes = torch.LongTensor(1)
        gt_boxes = torch.FloatTensor(1)

        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

        im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.data.resize_(1, 1, 5).zero_()
        num_boxes.data.resize_(1).zero_()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes))

            pred_boxes = self.bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = self.clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        all_res = {}
        thresh = 0.05
        for j in range(1, len(self.pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                res = self.fetch_dets(self.pascal_classes[j], cls_dets.cpu().numpy(), 0.5)
                all_res = dict(all_res, **res)

        return all_res
Ejemplo n.º 10
0
    def predict(cls, im_in):
        """For the input, do the predictions and return them.
        Args:
            im_in (a PIL image): The data on which to do the predictions."""

        assert len(im_in.shape) == 3, "RGB images only"

        if cls.model is None:
            cls.model = cls.get_model()
        thresh = 0.05

        with torch.no_grad():

            blobs, im_scales = _get_image_blob(im_in)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_data = Variable(
                torch.from_numpy(im_blob).permute(0, 3, 1, 2).cuda())

            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
            im_info = Variable(torch.from_numpy(im_info_np).cuda())

            gt_boxes = Variable(torch.zeros(1, 1, 5).cuda())
            num_boxes = Variable(torch.zeros(1).cuda())
            rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label = cls.model(
                im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()

                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(cls.model.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()

            result = dict()
            for j in range(1, len(cls.model.classes)):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets,
                               cfg.TEST.NMS,
                               force_cpu=not cfg.USE_GPU_NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    result[cls.model.classes[j]] = cls_dets.cpu().numpy(
                    ).tolist()
            return {
                'pred': result,
                'metrics': {
                    'rpn_loss_cls': rpn_loss_cls,
                    'rpn_loss_box': rpn_loss_box,
                    'RCNN_loss_cls': RCNN_loss_cls,
                    'RCNN_loss_bbox': RCNN_loss_bbox,
                    'rois_label': rois_label
                }
            }
Ejemplo n.º 11
0
            for j in xrange(1, imdb.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets, cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if vis:
                        im2show = vis_detections(im2show, imdb.classes[j],
                                                 cls_dets.cpu().numpy(), 0.3)
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = empty_array

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][i][:, -1]
                    for j in xrange(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
    def forward(self, input):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        # input输入形式为tuple = (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)
        # 0-8为anchor的背景得分, 9-17为anchor的前景得分
        scores = input[0][:, self._num_anchors:, :, :]  # [1,9,53,37]
        bbox_deltas = input[1]  # [1,36,53,37]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  # nms之前保存的建议区域数量,检测阶段为6000
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  # 通过nms后保存的建议区域数量,检测阶段为300
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  # ms的阈值 检测阶段0.7
        min_size = cfg[cfg_key].RPN_MIN_SIZE  # 建议区域的最小宽度或高度,检测阶段为16

        batch_size = bbox_deltas.size(0)  # batch_size = 1

        feat_height, feat_width = scores.size(2), scores.size(3)  # 53,37
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # 从坐标向量中返回坐标矩阵,元素交叉
        # torch.from_numpy将np数据转化为tensor,将tensor转化为np:tensor.numpy()
        # ravel()函数与flatten()函数功能类似,将多维数组降一维,np.flatten返回拷贝,不会影响原始数据,np.ravel返回视图view
        # np.vstack按垂直方向(行顺序)堆叠数组构成一个新的数组
        # shift_x,shift_y为[37,53]矩阵,展平后堆叠再转置,得到[1961,3]tensor
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(),shift_y.ravel(),shift_x.ravel(),shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()  #contiguous()把tensor变为连续分布形式

        A = self._num_anchors
        K = shifts.size(0)
        # 9个anchor,每个包含四个坐标偏移值,宽高中心点坐标
        self._anchors = self._anchors.type_as(scores)  # [9,4]
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)  # [1961, 9, 4]
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)  # [1, 17649, 4]

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchor
        bbox_deltas = bbox_deltas.permute(0,2,3,1).contiguous()  # [1, 53, 37, 36]
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)  # [1, 17649, 4]
        # Same story for the score
        scores = scores.permute(0,2,3,1).contiguous()  # permute将维度换位
        scores = scores.view(batch_size, -1)    # [1, 17649]
        # Convert anchors into proposals via bbox transformations
        # 根据anchor和偏移量计算proposals,delta表示偏移量,返回左上和右下顶点的坐标(x1,y1,x2,y2)
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
        # clip predicted boxes to image,将proposals限制在图片范围内,超出边界,则将边界赋值
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)  # _ is scores after sort,order is index after scores

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            # 从[1,17949,4]转换到[17649,4],从[1, 17649]转换到[17649]
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]
            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            # numel函数返回元素个数
            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]  # 测试阶段取前6000个得分的索引

            # 取前6000的索引对应的区域和得分,[6000,4],[6000,1],这里会重新生成proposals_single的下标0:5999
            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            # torch.cat 在第1维度拼接区域和得分矩阵,[6000,5]
            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
            # keep_idx_i 返回通过nms阈值限制之后的索引,该索引基于6000的下标[102,1]或[561,1]
            keep_idx_i = keep_idx_i.long().view(-1)

            # 取该索引的前300个建议区域
            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.,将不足300的建议区域补0
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output
Ejemplo n.º 13
0
        #     else:
        #         all_boxes[j][i] = empty_array

        for j in range(1, class_num):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            inds_threshold = torch.nonzero(scores[:, j] > 0.5).view(-1)
            print(
                'inds > 0.05 num = {},inds_threshold > 0.5 ={}      {}'.format(
                    inds.numel(), inds_threshold.numel(), class_name[j]))
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = pred_boxes[inds, :]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, 0.2, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, class_name[j],
                                             cls_dets.cpu().numpy(),
                                             args.thresh_class)
        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        prefix_img = img_filelist[8][0].split("/")[5]
        result_path = 'result/{}_{}.jpg'.format(prefix_img, str(epoch))
        cv2.imwrite(result_path, im2show)
Ejemplo n.º 14
0
    def forward(
        self, input
    ):  # input=(rpn_cls_prob, rpn_twin_pred, cfg_key) [(1,20,96,1,1), (1,20,96,1,1), 'TRAIN']

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor twins centered on cell i
        #   apply predicted twin deltas at cell i to each of the A anchors
        # clip predicted twins to video
        # remove predicted twins with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[
            0][:, self.
               _num_anchors:, :, :, :]  # rpn_cls_prob (1,10,96,1,1) 貌似只取了前景
        twin_deltas = input[1]  # rpn_twin_pred (1,20,96,1,1)
        cfg_key = input[2]  # 'TRAIN'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  # 12000
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  # 2000
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  # 0.8
        min_size = cfg[cfg_key].RPN_MIN_SIZE  # 0

        # 1. Generate proposals from twin deltas and shifted anchors
        length, height, width = scores.shape[-3:]  # (96,1,1)

        if DEBUG:
            print('score map size: {}'.format(scores.shape))

        batch_size = twin_deltas.size(0)  # 1

        # Enumerate all shifts
        shifts = np.arange(
            0, length) * self._feat_stride  # shifts = np.arange(0, 96) * 8
        shifts = torch.from_numpy(shifts.astype(float))
        shifts = shifts.contiguous().type_as(
            scores)  # shifts = np.arange(0, 96) * 8
        # print(shifts.shape)     # torch.Size([96])
        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 2) to
        # cell K shifts (K, 1, 1) to get
        # shift anchors (K, A, 2)
        # reshape to (1, K*A, 2) shifted anchors
        # expand to (batch_size, K*A, 2)
        A = self._num_anchors  # 10
        K = shifts.shape[0]  # 96
        self._anchors = self._anchors.type_as(scores)
        anchors = self._anchors.view(1, A, 2) + shifts.view(K, 1,
                                                            1)  # (96,10,2)
        anchors = anchors.view(1, K * A, 2).expand(batch_size, K * A,
                                                   2)  # (1, 960, 2)
        # Transpose and reshape predicted twin transformations to get them
        # into the same order as the anchors:
        #
        # twin deltas will be (batch_size, 2 * A, L, H, W) format
        # transpose to (batch_size, L, H, W, 2 * A)
        # reshape to (batch_size, L * H * W * A, 2) where rows are ordered by (l, h, w, a)
        # in slowest to fastest order
        twin_deltas = twin_deltas.permute(
            0, 2, 3, 4, 1).contiguous()  # rpn_twin_pred (1,96,1,1,20)
        twin_deltas = twin_deltas.view(batch_size, -1,
                                       2)  # rpn_twin_pred (1,96*1*1*10,2)
        # Same story for the scores:
        #
        # scores are (batch_size, A, L, H, W) format
        # transpose to (batch_size, L, H, W, A)
        # reshape to (batch_size, L * H * W * A) where rows are ordered by (l, h, w, a)
        scores = scores.permute(0, 2, 3, 4,
                                1).contiguous()  # rpn_cls_prob (1,96,1,1,10)
        scores = scores.view(batch_size, -1)  # rpn_cls_prob (1,96*1*1*10)

        # Convert anchors into proposals via twin transformations
        #                              (1,960,2),(1,960,2),1
        proposals = twin_transform_inv(
            anchors, twin_deltas, batch_size
        )  #(960个原始锚框,偏移,batch_size)(原始锚框第一列表示起始帧,第二列表示结束帧)(偏移第一列表示中心偏移,第二列表示长度偏移)
        # 预测的新锚框(1,960,2)第一列表示预测起始帧,第二列表示预测结束帧              #     rpn网络里的回归

        # 2. clip predicted wins to video
        #                      (1,960,2), 96*8,                       1
        proposals = clip_twins(proposals, length * self._feat_stride,
                               batch_size)  # 把proposals值范围抑制在(0,96*8)之间,其实没起作用
        # 3. remove predicted twins with either length < threshold
        # assign the score to 0 if it's non keep.
        no_keep = self._filter_twins_reverse(
            proposals, min_size)  # 去除小于min_size的窗口,但实际min_size=0,所以此句无用
        scores[no_keep] = 0  # scores是前景(1, 960)   每个值对应每帧图片是前景的概率

        scores_keep = scores  # 二分类(1,960)前景的概率
        proposals_keep = proposals  # 回归(1,960,2)预测起始帧 预测结束帧
        # sorted in descending order
        _, order = torch.sort(
            scores_keep, 1,
            True)  # (1,960)order是(0~959(scores里的下标))构成的列表,表示scores里的概率按从大到小排列

        # print ("scores_keep {}".format(scores_keep.shape))
        # print ("proposals_keep {}".format(proposals_keep.shape))
        # print ("order {}".format(order.shape))

        output = scores.new(batch_size, post_nms_topN,
                            3).zero_()  # (1,2000,3)全0的tensor类型列表

        if self._out_scores:  # False
            output_score = scores.new(batch_size, post_nms_topN, 2).zero_()

        for i in range(batch_size):

            proposals_single = proposals_keep[i]  # (960,2) 预测起始帧 预测结束帧
            scores_single = scores_keep[i]  # (960)前景的概率

            # 4. sort all (proposal, score) pairs by score from highest to lowest
            # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]  # (960)scores里的下标,scores里的概率按从大到小排列

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(
            ):  # False
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[
                order_single, :]  #(960,2)把proposals里的960个特征按其是前景概率的大小从大到小排列,后面两列仍然是预测起始和结束帧
            scores_single = scores_single[order_single].view(
                -1,
                1)  #(960,1)把proposals里的960个特征按其是前景概率的大小从大到小排列,后面一列是对应的从大到小的概率

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(
                torch.cat((proposals_single, scores_single), 1),
                nms_thresh,
                force_cpu=not cfg.USE_GPU_NMS
            )  #scores_single并在proposals_single的右侧形成(960,3),然后经过nms函数
            # keep_idx_i(<960, 1),取出scores_single>0.8的行作为前景,那列为>0.8时960个特征对应的索引
            keep_idx_i = keep_idx_i.long().view(-1)
            # keep_idx_i(<960)
            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:
                                        post_nms_topN]  # 没啥变化,post_nms_topN=2000,而keep_idx_i只有<960个数
            # keep_idx_i(<960)
            proposals_single = proposals_single[
                keep_idx_i, :]  # (<960,2)取出经过nms抑制后的proposals_single,后面两列是可能的前景的起止帧
            scores_single = scores_single[
                keep_idx_i, :]  # (<960,1)取出经过nms抑制后的scores_single,后面一列是可能的前景概率

            # padding 0 at the end.
            num_proposal = proposals_single.size(
                0)  # <960个,经过nms抑制后proposal的个数
            # print ("num_proposal: ", num_proposal)
            output[i, :, 0] = i  # (1,2000,3)仍然全0
            output[
                i, :num_proposal,
                1:] = proposals_single  #(1,2000,3)[其中(1,<960,3)<960的部分是前景,第一列全0存放未来的21类标签,后两列是可能的前景的起止帧;(960,2000)的部分全0,可能代表背景]
            if self._out_scores:  # False
                output_score[i, :, 0] = i
                output_score[i, :num_proposal, 1] = scores_single

        if self._out_scores:  # False
            return output, output_score
        else:
            return output  #(1,2000,3)[其中(1,<960,3)<960的部分是前景,第一列全0存放未来的21类标签,后两列是可能的前景的起止帧;(960,2000)的部分全0,可能代表背景]
Ejemplo n.º 15
0
    def forward(self, input):
        # input[0]: rpn_cls_prob.data [batch_size, 18, H, W]
        # input[1]: rpn_bbox_pred.data [batch_size, 36, H, W]
        # input[2]: im_info [h,w,ratio]
        # input[3]: cfg_key
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]

        cfg_key = input[3]
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  # 12000
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  # 2000
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  # 0.7
        min_size = cfg[cfg_key].RPN_MIN_SIZE  # 8

        batch_size = bbox_deltas.size(0)

        feat_hegiht, feat_width = scores.size(2), scores.size(3)

        #shift_x:[W]->[0, 16, 32, 48...,(W-1)*16]
        shift_x = np.arange(0, feat_width) * self._feat_stride

        #shift_Y:[H]->[0, 16, 32, 48...,(H-1)*16]
        shift_y = np.arange(0, feat_hegiht) * self._feat_stride

        #shift_x:[H, W]->[[0, 16, 32, 48...,(W-1)*16],
        #                 [0, 16, 32, 48...,(W-1)*16],
        #                        ..............       ]
        #shift_y:[H, W]->[[0, 0, 0, 0...]
        #                 [16,16,16,16..]
        #                   ...........
        #                 [(H-1)*16,....]]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)

        #shifts:[H*W, 4]->[[0,  0,  0, 0],
        #                   ..........
        #                  [(W-1)*16, 0, (W-1)*16, 0],
        #                  [ 0 ,16, 0, 16],
        #                    ............
        #                  [(W-1)*16, 16, (W-1)*16, 16],
        #                     ............
        #                  [(W-1)*16, (H-1)*16, (W-1)*16, (H-1)*16]]
        #
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors  #9
        K = shifts.size(0)  #feature_map->(H, W) -> H * W = K

        self._anchors = self._anchors.type_as(scores)

        #anchors:[K, A, 4] 《=》[H*W, A, 4]
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        #bbox_delta:[batch_size, 36, H, W] => [batch_size, H, W, 36(9 anchors * 4)]
        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        #bbox_delta:[batch_size, H, W, 36(9 anchors * 4)] => [batch_size, H*w*9, 4]
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        #scores:[batch_size, 9, H, W] => [batch_szie, H, W, 9]
        scores = scores.permute(0, 2, 3, 1).contiguous()
        #scores:[batch_szie, H, W, 9] => [batch_size, H*W*9]
        scores = scores.view(batch_size, -1)

        #1.convert anchors into proposals
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
        #2.clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)

        scores_keep = scores
        proposals_keep = proposals
        #1:维度,True代表降序(1:Which dimension is sorted; True:descending order)
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            #3.remove predicted boxes with either height or width < threshold
            proposals_single = proposals_keep[i]
            scores_single = scores[i]
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores.numel():
                order_single = order_single[:pre_nms_topN]

            # proposal_single:[batch_size, pre_nms_topN, 4]
            # scores_single : [batch_size, pre_nms_topN, 1]
            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh,
                             force_cpu=not cfg.USE_GPU_NMS)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            #output[i,:,0]是为了区分一个batch中的不同图片,
            #因为这些推荐框是在不同的feature_map上进行后续的选取
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output