def Predict(self, im_in, area):
        # initilize the tensor holder here.
        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        num_boxes = torch.LongTensor(1)
        gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.cuda > 0:
            im_data = im_data.cuda()
            im_info = im_info.cuda()
            num_boxes = num_boxes.cuda()
            gt_boxes = gt_boxes.cuda()

        # make variable
        with torch.no_grad():
            im_data = Variable(im_data)
            im_info = Variable(im_info)
            num_boxes = Variable(num_boxes)
            gt_boxes = Variable(gt_boxes)

        if self.cuda > 0:
            cfg.CUDA = True

        if self.cuda > 0:
            self.fasterRCNN.cuda()

        self.fasterRCNN.eval()

        #im_in = cv2.imread(im_file)
        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im_in = im_in[:, :, ::-1]
        im = cv2.cvtColor(im_in, cv2.COLOR_BGR2RGB)

        blobs, im_scales = self._get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.data.resize_(1, 1, 5).zero_()
        num_boxes.data.resize_(1).zero_()


        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.class_agnostic:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(self.pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
            pred_boxes = _.cuda() if self.cuda > 0 else _

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        ItemAndBoxes_all = []
        im2show = np.copy(im)
        for j in xrange(1, len(self.pascal_classes)):
            inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                im2show, ItemAndBoxes = vis_detections(im2show,
                                                       self.pascal_classes[j],
                                                       cls_dets.cpu().numpy(),
                                                       self.visThresh)
                ItemAndBoxes_all.append(ItemAndBoxes)

        ItemAndBoxes_all = sorted(ItemAndBoxes_all,
                                  key=lambda x: x[2],
                                  reverse=True)
        ItemAndBoxes_all = ItemAndBoxes_all[0:3]
        ItemAndBoxes_all = sorted(ItemAndBoxes_all, key=lambda x: x[1][0])

        if self.vis == 1:
            cv2.namedWindow("result", 0)
            cv2.resizeWindow("result", 1080, 720)
            cv2.imshow('result', im2show)
            cv2.waitKey(0)
            result_path = os.path.join(self.image_dir, str(area) + ".jpg")
            cv2.imwrite(result_path, im2show)

        return {
            "Left": ItemAndBoxes_all[0][0],
            "Mid": ItemAndBoxes_all[1][0],
            "Right": ItemAndBoxes_all[2][0]
        }
    def detect(self, im_in):
        if len(im_in.shape) == 2:       # if gray == 1 ch
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # im = im_in[:,:,::-1]

        blobs, im_scales = self._get_image_blob(im_in)  # Image in as BGR order
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        self.gt_boxes.resize_(1, 1, 5).zero_()
        self.num_boxes.resize_(1).zero_()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.fasterRCNN(self.im_data, self.im_info, self.gt_boxes, self.num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if self.cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if self.cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.args.class_agnostic:
                    if self.args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4 * len(self.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        ret_bbox_score_class = []    # bbox(4), score(1), class_name(1)
        for j in range(1, len(self.classes)):
            if self.classes[j] in self.display_classes.keys():
                inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if self.args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order], self.cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]

                    for k in range(cls_dets.shape[0]):
                        # tensor to numpy
                        ret_bbox_score_class.append([tuple(int(np.round(x.cpu())) for x in cls_dets[k, :4]), cls_dets[k, 4].item(), self.classes[j]])

        return ret_bbox_score_class
Beispiel #3
0
def test(dataset="kaggle_pna",
         test_ds="test",
         arch="couplenet",
         net="res152",
         load_dir="save",
         output_dir="output",
         cuda=True,
         large_scale=False,
         class_agnostic=False,
         checksession=1,
         checkepoch=1,
         checkpoint=10021,
         batch_size=1,
         vis=False,
         anchor_scales=4,
         min_conf=.5,
         **kwargs):
    print("Test Arguments: {}".format(locals()))

    # Import network definition
    if arch == 'rcnn':
        from model.faster_rcnn.vgg16 import vgg16
        from model.faster_rcnn.resnet import resnet
    elif arch == 'rfcn':
        from model.rfcn.resnet_atrous import resnet
    elif arch == 'couplenet':
        from model.couplenet.resnet_atrous import resnet

    from roi_data_layer.pnaRoiBatchLoader import roibatchLoader
    from roi_data_layer.pna_roidb import combined_roidb

    image_read_func = lambda path: pydicom.dcmread(path).pixel_array

    print('Called with kwargs:')
    print(kwargs)

    # Warning to use cuda if available
    if torch.cuda.is_available() and not cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # Anchor settings: ANCHOR_SCALES: [8, 16, 32] or [4, 8, 16, 32]
    if anchor_scales == 3:
        scales = [8, 16, 32]
    elif anchor_scales == 4:
        scales = [4, 8, 16, 32]
    else:
        scales = [8, 16, 32]

    # Dataset related settings: MAX_NUM_GT_BOXES: 20, 30, 50
    np.random.seed(cfg.RNG_SEED)

    if test_ds == "val":
        imdbval_name = "pna_2018_val"
    elif test_ds == "test":
        imdbval_name = "pna_2018_test"
    set_cfgs = ['ANCHOR_SCALES', str(scales), 'ANCHOR_RATIOS', '[0.5,1,2]']

    cfg_file = "cfgs/{}_ls.yml".format(
        net) if large_scale else "cfgs/{}.yml".format(net)

    import model
    model_repo_path = os.path.dirname(
        os.path.dirname(os.path.dirname(model.__file__)))

    if cfg_file is not None:
        cfg_from_file(os.path.join(model_repo_path, cfg_file))
    if set_cfgs is not None:
        cfg_from_list(set_cfgs)

    test_kwargs = kwargs.pop("TEST", None)
    resnet_kwargs = kwargs.pop("RESNET", None)
    mobilenet_kwargs = kwargs.pop("MOBILENET", None)

    if test_kwargs is not None:
        for key, value in test_kwargs.items():
            cfg["TEST"][key] = value

    if resnet_kwargs is not None:
        for key, value in resnet_kwargs.items():
            cfg["RESNET"][key] = value

    if mobilenet_kwargs is not None:
        for key, value in mobilenet_kwargs.items():
            cfg["MOBILENET"][key] = value

    if kwargs is not None:
        for key, value in kwargs.items():
            cfg[key] = value

    print('Using config:')
    cfg.MODEL_DIR = os.path.abspath(cfg.MODEL_DIR)
    cfg.SUBMISSION_DIR = os.path.abspath(cfg.SUBMISSION_DIR)
    cfg.TEST_DATA_CLEAN_PATH = os.path.abspath(cfg.TEST_DATA_CLEAN_PATH)
    pprint.pprint(cfg)
    # create output directory
    # output_dir = os.path.join(output_dir, arch, net, dataset)
    output_dir = cfg.SUBMISSION_DIR
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb, ratio_list, ratio_index = combined_roidb(imdbval_name, False)
    imdb.competition_mode(on=True)
    imdb.sub_mode = True
    print('{:d} roidb entries'.format(len(roidb)))

    # Trained network weights path
    # input_dir = load_dir + "/" + arch + "/" + net + "/" + dataset
    input_dir = cfg.MODEL_DIR
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir, '{}_{}_{}_{}.pth'.format(arch, checksession, checkepoch,
                                            checkpoint))

    # Initialize the network:
    if net == 'vgg16':
        # model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
        print("Pretrained model is not downloaded and network is not used")
    elif net == 'res18':
        model = resnet(imdb.classes,
                       18,
                       pretrained=False,
                       class_agnostic=class_agnostic)
    elif net == 'res34':
        model = resnet(imdb.classes,
                       34,
                       pretrained=False,
                       class_agnostic=class_agnostic)
    elif net == 'res50':
        model = resnet(imdb.classes,
                       50,
                       pretrained=False,
                       class_agnostic=class_agnostic)
    elif net == 'res101':
        model = resnet(imdb.classes,
                       101,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    elif net == 'res152':
        model = resnet(imdb.classes,
                       152,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    # Create network architecture
    model.create_architecture()

    # Load pre-trained network weights
    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    model.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')

    # Initialize the tensor holder
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # Copy tensors in CUDA memory
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # Make variable
    im_data = Variable(im_data, volatile=True)
    im_info = Variable(im_info, volatile=True)
    num_boxes = Variable(num_boxes, volatile=True)
    gt_boxes = Variable(gt_boxes, volatile=True)

    # Set cuda usage
    if cuda:
        cfg.CUDA = True

    # Copy network to CUDA memroy
    if cuda:
        model.cuda()

    # Start test or evaluation
    start = time.time()
    max_per_image = 100

    # Visualize output bounding boxes
    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = arch + '_' + net
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    img_dataset = roibatchLoader(roidb,
                                 ratio_list,
                                 ratio_index,
                                 batch_size,
                                 imdb.num_classes,
                                 training=False,
                                 normalize=False)
    dataloader = torch.utils.data.DataLoader(
        img_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,  # args.num_workers
        pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    # Turn on model evaluation mode, i.e. train=False
    model.eval()

    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    img_dataset.resize_batch()
    for i in range(num_images):

        data = next(data_iter)
        im_data.data.resize_(data[0].size()).copy_(data[0])
        im_info.data.resize_(data[1].size()).copy_(data[1])
        gt_boxes.data.resize_(data[2].size()).copy_(data[2])
        num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = model(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:

            # Apply bounding-box regression deltas

            box_deltas = bbox_pred.data

            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(batch_size, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(batch_size, -1,
                                                 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        if vis:
            im = image_read_func(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel(
            ) > 0:  # tensor.numel() -> returns number of elements in tensor
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if class_agnostic:  # Find any object
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                         .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)

    print('Kaggle submission file')

    if dataset == 'kaggle_pna':
        cipher = {'rcnn': 'alpha', 'rfcn': 'beta', 'couplenet': 'gamma'}
        created = datetime.now().strftime("%Y%m%d%H%M")
        sub_file = cipher[arch] + '_' + created + '_cls-{}_submission.txt'
        print('Submit file that ends with "_cls-3_submission.txt" file.')
        submission_file = os.path.join(output_dir, sub_file)
        imdb.write_kaggle_submission_file(all_boxes,
                                          submission_file,
                                          min_conf=min_conf)

    end = time.time()
    print("Deleting irrelevant files...")
    delete_irrelevant_files(cfg.SUBMISSION_DIR)
    print("test time: %0.4fs" % (end - start))
Beispiel #4
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \
            RCNN_loss_cls, RCNN_loss_bbox, rois_label \
            = self.FRCN(im_data, im_info, gt_boxes, num_boxes)

        # get global and local region from Faster R-CNN

        base_feat = self.FRCN.RCNN_base(im_data)

        #print(rois.data.cpu().numpy())
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        box_deltas = self.FRCN._bbox_pred.data

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if self.class_agnostic:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) * torch.FlaotTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torhc.FlaotTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(self.classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        # get global region
        thresh = 0.01

        region_g = np.ndarray((0, 5))
        region_l = np.ndarray((0, 5))
        for j in range(1, 4):
            inds = torch.nonzero(scores[:, j] >= thresh).view(-1)
            inds_l = torch.nonzero(scores[:, j + 3] >= thresh).view(-1)
            #print(inds)
            if inds.numel() > 0 and inds_l.numel() > 0:
                cls_scores = scores[:, j][inds]
                cls_scores_l = scores[:, j + 3][inds_l]
                #print(cls_scores)
                #print(cls_scores_l)
                _, order = torch.sort(cls_scores, 0, True)
                _, order_l = torch.sort(cls_scores_l, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds]
                    cls_boxes_l = pred_boxes[inds_l]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_boxes_l = pred_boxes[inds_l][:,
                                                     (j + 3) * 4:(j + 4) * 4]
                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets_l = torch.cat(
                    (cls_boxes_l, cls_scores_l.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                cls_dets_l = cls_dets_l[order]

                region_g = np.vstack((region_g, cls_dets))
                region_l = np.vstack((region_l, cls_dets_l))
                """
                keep = nms(cls_dets, 0.9, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                keep = nms(cls_dets_l, 0.9, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets_l = cls_dets_l[keep.view(-1).long()]

                cls_dets = cls_dets[order]
                cls_dets_l = cls_dets_l[order_l]

                sort_ind = np.argsort(cls_dets[...,-1])
                high_ind = sort_ind[-self.minibatch//2:]
                low_ind = sort_ind[:self.minibatch//2]
                region_g = np.vstack((region_g. cls_dets[high_ind]))
                region_g = np.vstack((region_g, cls_dets[low_ind]))]

                sort_ind = np.argsort(cls_dets_l[..., -1])
                high_ind = sort_ind[-self.minibatch//2:]
                low_ind = sort_ind[:self.minibatch//2]
                region_l = np.vstack((region_l, cls_dets_l[high_ind]))
                reigon_l = np.vstack((region_l, cls_dets_l[low_ind]))
                """
                #region_g = np.vstack((region_g, cls_dets[np.argmax(cls_dets[..., -1])]))
                #region_l = np.vstack((region_l, cls_dets_l[np.argmax(cls_dets_l[..., -1])]))

        if not self.training:
            self.minibatch = 1

        if self.training:
            keep = nms(torch.tensor(region_g).cuda(),
                       0.9,
                       force_cpu=not cfg.USE_GPU_NMS)
            if type(keep) is not list:
                keep = keep.view(-1).long()
            region_g = region_g[keep]
            sort_ind = np.argsort(region_g[..., -1])
            high_ind_g = sort_ind[-self.minibatch // 2:]
            low_ind_g = sort_ind[:self.minibatch // 2]

            keep = nms(torch.tensor(region_l).cuda(),
                       0.9,
                       force_cpu=not cfg.USE_GPU_NMS)
            if type(keep) is not list:
                keep = keep.view(-1).long()
            region_l = region_l[keep]
            sort_ind = np.argsort(region_l[..., -1])
            high_ind_l = sort_ind[-self.minibatch // 2:]
            low_ind_l = sort_ind[:self.minibatch // 2]

            high_num = min(len(high_ind_g), len(high_ind_l))
            high_ind_g = high_ind_g[:high_num]
            high_ind_l = high_ind_l[:high_num]

            low_num = min(len(low_ind_g), len(low_ind_l))
            low_ind_g = low_ind_g[:low_num]
            low_ind_l = low_ind_l[:low_num]

            proposal_g = np.vstack((region_g[high_ind_g], region_g[low_ind_g]))
            proposal_l = np.vstack((region_l[high_ind_l], region_l[low_ind_l]))

            #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g)
            #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l)

            gt_boxes = gt_boxes.cpu().numpy()[0, :2]

            gt_g = gt_boxes[np.where(gt_boxes[..., -1] < 4)[0]]
            gt_l = gt_boxes[np.where(gt_boxes[..., -1] >= 4)[0]]

            # compute pare ground truth
            def compute_iou(ps, gt, th=0.5):
                iou_x1 = np.maximum(ps[..., 0], gt[0])
                iou_y1 = np.maximum(ps[..., 1], gt[1])
                iou_x2 = np.minimum(ps[..., 2], gt[2])
                iou_y2 = np.minimum(ps[..., 3], gt[3])
                iou_w = np.maximum(iou_x2 - iou_x1, 0)
                iou_h = np.maximum(iou_y2 - iou_y1, 0)
                iou_area = iou_w * iou_h
                gt_area = (gt[2] - gt[0]) * (gt[3] - gt[1])
                p_area = (ps[..., 2] - ps[..., 0]) * (ps[..., 3] - ps[..., 1])
                overlap = iou_area / (gt_area + p_area - iou_area)
                count = np.zeros((ps.shape[0]), dtype=int)
                count[overlap >= self.gt_iou] += 1
                return count

            cou = compute_iou(proposal_g, gt_g[0]) + compute_iou(
                proposal_l, gt_l[0])

            ## 2019.2.13
            #glcc_gt = np.zeros((proposal_g.shape[0]), dtype=int)
            #gilcc_gt[cou==2] = gt_g[0,-1]
            glcc_gt = np.array([gt_g[0, -1]], dtype=int)
            glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda()
            self.glcc_gt.data.resize_(glcc_gt.size()).copy_(glcc_gt)

        else:
            # test phase
            proposal_g = region_g[np.argmax(region_g[..., -1])][None, ...]
            proposal_l = region_l[np.argmax(region_l[..., -1])][None, ...]
            #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g.size())
            #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l.size())

        # if true, then show detection global and local region
        if False:
            gt_boxes = gt_boxes.astype(np.int)
            im = im_data.cpu().numpy()[0]
            im = np.transpose(im, (1, 2, 0))[..., ::-1]
            im -= im.min()
            im /= im.max()
            plt.imshow(im.astype(np.float))
            ax = plt.axes()
            ax.add_patch(
                plt.Rectangle((region_g[0, 0], region_g[0, 1]),
                              region_g[0, 2] - region_g[0, 0],
                              region_g[0, 3] - region_g[0, 1],
                              fill=False,
                              edgecolor='red',
                              linewidth=1))

            ax.add_patch(
                plt.Rectangle((region_l[0, 0], region_l[0, 1]),
                              region_l[0, 2] - region_l[0, 0],
                              region_l[0, 3] - region_l[0, 1],
                              fill=False,
                              edgecolor='yellow',
                              linewidth=1))

            ax.add_patch(
                plt.Rectangle((gt_boxes[0, 0], gt_boxes[0, 1]),
                              gt_boxes[0, 2] - gt_boxes[0, 0],
                              gt_boxes[0, 3] - gt_boxes[0, 1],
                              fill=False,
                              edgecolor='green',
                              linewidth=1))
            ax.add_patch(
                plt.Rectangle((gt_boxes[1, 0], gt_boxes[1, 1]),
                              gt_boxes[1, 2] - gt_boxes[1, 0],
                              gt_boxes[1, 3] - gt_boxes[1, 1],
                              fill=False,
                              edgecolor='white',
                              linewidth=1))
            plt.show()

        rois_g = np.zeros((1, proposal_g.shape[0], 5), dtype=np.float32)
        rois_g[0, :, 1:5] = proposal_g[:, :4]
        #rois_g /= 16.
        rois_l = np.zeros((1, proposal_l.shape[0], 5), dtype=np.float32)
        rois_l[0, :, 1:5] = proposal_l[:, :4]
        #rois_l /= 16.
        rois_g = torch.tensor(rois_g, dtype=torch.float).cuda()
        rois_l = torch.tensor(rois_l, dtype=torch.float).cuda()
        self.rois_g.data.resize_(rois_g.size()).copy_(rois_g)
        self.rois_l.data.resize_(rois_l.size()).copy_(rois_l)
        # global region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(self.rois_g.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_g = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_g = self.FRCN.RCNN_roi_align(base_feat,
                                                     self.rois_g.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_g = self.FRCN.RCNN_roi_pool(base_feat,
                                                    self.rois_g.view(-1, 5))

        # local region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(self.rois_l.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_l = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_l = self.FRCN.RCNN_roi_align(base_feat,
                                                     self.rois_l.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_l = self.FRCN.RCNN_roi_pool(base_feat,
                                                    self.rois_l.view(-1, 5))

        #print(pooled_feat_g.cpu().detach().numpy().shape)
        x = torch.cat((pooled_feat_g, pooled_feat_l), dim=1)
        #print(x.cpu().detach().numpy().shape)
        x = self.glcc_conv1(x)
        x = F.relu(x)
        x = x.view(-1, self.roipool * self.roipool * 512)
        x = self.glcc_fc1(x)
        x = F.relu(x)
        x = nn.Dropout()(x)
        x = self.glcc_fc2(x)
        x = F.relu(x)
        x = nn.Dropout()(x)
        glcc_out = self.glcc_fc_out(x)

        if self.training:
            glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda()
            glcc_loss = F.cross_entropy(glcc_out, self.glcc_gt)
        else:
            glcc_loss = 0.

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, glcc_out, glcc_loss
Beispiel #5
0
def test_net(fasterRCNN, image, img_blob, img_scales, items, labels, i):
    im_data, im_info, num_boxes, gt_boxes = items
    im_info_np = np.array(
        [[img_blob.shape[1], img_blob.shape[2], img_scales[0]]],
        dtype=np.float32)
    im_data_pt = torch.from_numpy(img_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if config.TEST_BBOX_REG:
        box_deltas = bbox_pred.data
        if config.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if config.cuda:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS).cuda() \
                             + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS).cuda()
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS) \
                             + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS)

            box_deltas = box_deltas.view(1, -1, 4 * len(labels))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= img_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    copy_img = np.copy(image[:, :, ::-1])
    bubbles = []
    for j in range(1, len(labels)):
        inds = torch.nonzero(scores[:, j] > config.THRESH).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], config.TEST_NMS)
            cls_dets = cls_dets[keep.view(-1).long()]

            copy_img, vis_img, bubbles, boxes = sbd_utils.divideBubbleFromImage(
                copy_img,
                image[:, :, ::-1],
                labels[j],
                cls_dets.cpu().numpy(),
                config.CLASS_THRESH,
                bg=config.BACKGROUND)

    copy_img, vis_img, cuts = sbd_utils.divideCutFromImage(
        copy_img, image[:, :, ::-1], i, bg=config.BACKGROUND)
    alpha_image = sbd_utils.addImageToAlphaChannel(copy_img,
                                                   copy_img,
                                                   FLAG='conversion')
    vis_img, texts = text.detection(vis_img, bubbles, boxes)
    return alpha_image, vis_img, cuts, bubbles, texts
Beispiel #6
0
def test_epoch(fasterRCNN, val_load, epoch):
    fasterRCNN.eval()
    tps = list()
    fps = list()
    fns = list()
    for idx, blob in enumerate(val_load):
        print('\r{}/{}'.format(idx, len(val_load)), end='')

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(*blob())

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    #box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))
                    box_deltas = box_deltas.view(1, -1, 4 * 2)

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, blob.im_sizes.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        inds = torch.nonzero(scores[:, 1] > cfg.obj_score_thres).view(-1)  #0.5
        if inds.numel() > 0:
            cls_scores = scores[:, 1][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds, 4:]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], 0.1)
            cls_dets = cls_dets[keep.view(-1).long()]
            cls_dets = cls_dets.cpu()
            cls_box_dets = cls_dets[:, :-1]
            cls_box_scores = cls_dets[:, -1].numpy()
        else:
            print('  nothing was detected.')
            cls_dets = None
            cls_box_dets = None
        gts_box = blob.gt_boxes.squeeze()[:blob.num_boxes.item(), :-1].cpu()
        tp, fp, fn = confusion_matrix(cls_box_dets, gts_box)
        tps.append(tp)
        fps.append(fp)
        fns.append(fn)
        preds_path = os.path.join(args.save_dir, args.dataset, args.net,
                                  'preds_boxes', 'epoch_{}'.format(-1))

        if not os.path.exists(preds_path):
            os.makedirs(preds_path)
        if cls_dets is not None:
            with open(
                    os.path.join(preds_path,
                                 '{}.txt'.format(blob.img_names[0])),
                    'w') as f:
                cls_dets = cls_dets.numpy() * blob.im_sizes[0, 2].item()
                cls_dets[:, -1] /= blob.im_sizes[0, 2].item()
                for i, e in enumerate(cls_dets):
                    #f.write(str(cls_box_scores[i])+' '+ ' '.join(map(str, e))+'\n')
                    f.write(' '.join(map(str, e)) + '\n')

    P = sum(tps) / (sum(tps) + sum(fps) + 1e-6)
    R = sum(tps) / (sum(tps) + sum(fns) + 1e-6)
    printf('epoch: ', epoch)
    printf("precision:", P)
    printf('recall:', R)
    F1 = (2 * P * R) / (P + R + 1e-6)
    printf("F1:", F1)
    print("F1:", F1)
    printf('\n\n\n')
    return F1
Beispiel #7
0
def run_model(support_im_paths, query_path, cnt_shot, output_path_folder):
    # support
    # support_root_dir = 'datasets/supports'
    # class_dir = 'horse'
    # n_shot = 2
    # im_paths = list(Path(os.path.join(support_root_dir, class_dir)).glob('*.jpg'))
    CWD = os.getcwd()

    print(support_im_paths)
    n_shot = len(support_im_paths)
    random.seed(0)
    im_path_list = random.sample(support_im_paths, k=n_shot)
    im_list = []
    #fig = plt.figure(num=None, figsize=(8, 8), dpi=50, facecolor='w', edgecolor='k')
    for i, im_path in enumerate(im_path_list):
        im = Image.open(im_path)
        im_list.append(np.asarray(im))
    support_data = support_im_preprocess(im_list, cfg, 320, n_shot)

    # query
    im = np.asarray(Image.open(query_path))
    im2show = im.copy()
    query_data, im_info, gt_boxes, num_boxes = query_im_preprocess(im, cfg)

    # prepare data
    data = [query_data, im_info, gt_boxes, num_boxes, support_data]
    im_data, im_info, num_boxes, gt_boxes, support_ims = prepare_variable()
    with torch.no_grad():
        im_data.resize_(data[0].size()).copy_(data[0])
        im_info.resize_(data[1].size()).copy_(data[1])
        gt_boxes.resize_(data[2].size()).copy_(data[2])
        num_boxes.resize_(data[3].size()).copy_(data[3])
        support_ims.resize_(data[4].size()).copy_(data[4])

    # model
    cfg_from_list(
        ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'])
    model_dir = os.path.join(CWD, 'models')
    load_path = os.path.join(model_dir,
                             'faster_rcnn_{}_{}_{}.pth'.format(1, 11, 34467))

    model = get_model('multi', load_path, n_shot)

    start_time = time.time()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = model(im_data, im_info, gt_boxes, num_boxes, support_ims, gt_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]
    box_deltas = bbox_pred.data

    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(1, -1, 4)

    pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
    pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    # re-scale boxes to the origin img scale
    pred_boxes /= data[1][0][2].item()

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    thresh = 0.05
    inds = torch.nonzero(scores[:, 1] > thresh).view(-1)
    cls_scores = scores[:, 1][inds]
    _, order = torch.sort(cls_scores, 0, True)
    cls_boxes = pred_boxes[inds, :]
    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
    cls_dets = cls_dets[order]
    keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
    cls_dets = cls_dets[keep.view(-1).long()]

    for i in range(cls_dets.shape[0]):
        w = cls_dets[i, 2] - cls_dets[i, 0]
        h = cls_dets[i, 3] - cls_dets[i, 1]
        if w > 0.5 * im2show.shape[1] or h > 0.5 * im2show.shape[0]:
            cls_dets[i, 4] = 0

    end_time = time.time()

    im2show = vis_detections(im2show, ' ', cls_dets.cpu().numpy(), 0.5)

    output_path = os.path.join(output_path_folder,
                               'result' + str(cnt_shot) + '.jpg')
    cv2.imwrite(output_path, im2show[:, :, ::-1])
    print(cls_dets)
    print(end_time - start_time)
Beispiel #8
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, :, 1]  # batch_size x num_rois x 1
        bbox_deltas = input[1]      # batch_size x num_rois x 4
        im_info = input[2]
        cfg_key = input[3]
        feat_shapes = input[4]        

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, 
                feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores)
        num_anchors = anchors.size(0)

        anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()
                
        scores_keep = scores
        proposals_keep = proposals

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output
Beispiel #9
0
def test_net(model=None, image=None, params=None, bg=None, cls=None):
    blob, scale, label = params
    with torch.no_grad():  # pre-processing data for passing net
        im_data = Variable(torch.FloatTensor(1))
        im_info = Variable(torch.FloatTensor(1))
        num_boxes = Variable(torch.LongTensor(1))
        gt_boxes = Variable(torch.FloatTensor(1))

    im_info_np = np.array([[blob.shape[1], blob.shape[2], scale[0]]], dtype=np.float32)
    im_data_pt = torch.from_numpy(blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():  # resize
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = model(im_data, im_info, gt_boxes, num_boxes)  # predict

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if opt.TEST_BBOX_REG:
        box_deltas = bbox_pred.data
        if opt.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if opt.cuda:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS).cuda() \
                             + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS).cuda()
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS) \
                             + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS)

            box_deltas = box_deltas.view(1, -1, 4 * len(label))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= scale[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    image = np.copy(image[:, :, ::-1])
    demo = image.copy()
    bubbles = []
    dets_bubbles = []

    for j in range(1, len(label)):
        inds = torch.nonzero(scores[:, j] > opt.THRESH).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], opt.TEST_NMS)
            cls_dets = cls_dets[keep.view(-1).long()].cpu().numpy()

            #  post-processing : get contours of speech bubble
            demo, image, bubbles, dets_bubbles = bubble_utils.get_cnt_bubble(image, image.copy(), label[j], cls_dets,
                                                                             cls, bg=bg)
    return demo, image, bubbles, dets_bubbles
Beispiel #10
0
            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]
            if cfg.TEST.BBOX_REG:
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(batch_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(
                            batch_size, -1, 4 * len(imagenet_vid_classes))
                pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size)
            else:
                # Simply repeat the boxes, once for each class
                raise NotImplementedError

            # Assume scales are same for frames in the same video
            im_scale = im_info.data[0][-1]
            pred_boxes /= im_scale

            #pred_boxes = pred_boxes.squeeze()
            #scores = scores.squeeze()
            vid_pred_boxes.append(pred_boxes)
            vid_scores.append(scores)
            curr_frame_t0 = frames['frame_number'].squeeze()[0]
            print("Processed frame : t={} / {}"\
                    .format(curr_frame_t0, video_dataset._n_frames-1))
Beispiel #11
0
    def detect(self, bbx):
        with torch.no_grad():
            vis = False
            thresh = 0.05

            im_data = torch.FloatTensor(1).to(self.device)
            im_info = torch.FloatTensor(1).to(self.device)
            num_boxes = torch.LongTensor(1).to(self.device)
            gt_boxes = torch.FloatTensor(1).to(self.device)

            # total_tic = time.time()

            x, y, w, h = [int(p) for p in bbx]
            x = max(x, 0)
            y = max(y, 0)
            im = self.img[y:(y + h), x:(x + w)]
            # print ' (x=%d, y=%d), %d * %d, (%d, %d) - cropsize: %d * %d' % (x, y, w, h, x+w, y+h, im.shape[1], im.shape[0])
            w, h = im.shape[1], im.shape[0]
            refine_bbx = [0, 0, w, h]
            if w * h == 0:
                print 'What? %d * %d' % (w, h)
                # raw_input('Continue?')
                return False

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.data.resize_(1, 1, 5).zero_()
            num_boxes.data.resize_(1).zero_()

            # pdb.set_trace()
            # det_tic = time.time()

            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).to(self.device)

                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(self.pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
                pred_boxes = _.to(self.device)

            pred_boxes /= im_scales[0]

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()

            # det_toc = time.time()
            # detect_time = det_toc - det_tic
            # misc_tic = time.time()

            if vis:
                im2show = np.copy(im)

            j = 15
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            step = 0
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                dets = cls_dets.cpu().numpy()
                for i in range(dets.shape[0]):
                    if dets[i, -1] > cf:
                        x1, y1, w1, h1 = dets[i][:4]
                        det = [x1, y1, w1 - x1, h1 - y1]
                        ratio = self.a_train_set.IOU(det, refine_bbx)
                        if ratio[0] > iou:  # IOU between prediction and detection should not be limited
                            step += 1

                if vis:
                    print cls_dets
                    dets = cls_dets.cpu().numpy()
                    # for i in range(dets.shape[0]):
                    #     bbox = tuple(int(np.round(x)) for x in dets[i, :4])
                    #     score = dets[i, -1]
                    #     if score > thresh:
                    #         crop = im[bbox[1]:bbox[3], bbox[0]:bbox[2]]
                    #         cv2.imwrite('in_place/%02d.jpg'%step, crop)
                    #         step += 1

                    im2show = vis_detections(im2show, self.pascal_classes[j],
                                             dets)

            # misc_toc = time.time()
            # nms_time = misc_toc - misc_tic

            if vis:
                cv2.imshow('test', im2show)
                cv2.waitKey(0)
                # result_path = os.path.join('results', imglist[num_images][:-4] + "_det.jpg")
                # cv2.imwrite(result_path, im2show)

            if step:
                return True
            return False
Beispiel #12
0
def predict1():
    data = {"success": False}
    im_info1 = {}
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data, volatile=True)
    im_info = Variable(im_info, volatile=True)
    num_boxes = Variable(num_boxes, volatile=True)
    gt_boxes = Variable(gt_boxes, volatile=True)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()

    start = time.time()
    max_per_image = 100
    thresh = 0.05
    vis = True

    file_dir = os.path.join(basedir, 'upload/')
    print('file_dir',file_dir)
    webcam_num = args.webcam_num
    # Set up webcam or get image directories
    if webcam_num >= 0:
        cap = cv2.VideoCapture(webcam_num)
        num_images = 0
    else:
        imglist = os.listdir(file_dir)
        num_images = len(imglist)

    print('Loaded Photo: {} images.'.format(num_images))

    while (num_images >= 0):
        total_tic = time.time()
        if webcam_num == -1:
            num_images -= 1

        # Get image from the webcam
        if webcam_num >= 0:
            if not cap.isOpened():
                raise RuntimeError("Webcam could not open. Please check connection.")
            ret, frame = cap.read()
            im_in = np.array(frame)
        # Load the demo image
        else:
            im_file = os.path.join(file_dir, imglist[num_images])
            print("im_fileeeeeee",im_file)
            # im = cv2.imread(im_file)
            im_in = np.array(imread(im_file))
        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im = im_in[:, :, ::-1]

        blobs, im_scales = _get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        with torch.no_grad():
            im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.resize_(1, 1, 5).zero_()
            num_boxes.resize_(1).zero_()

        # pdb.set_trace()
        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)



        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    if args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        jindex = []
        info = {}
        info['predictions'] = list()
        filename = os.path.split(im_file)
        print("filename",filename[1])
        info['filename'] = filename[1]
        image1 = Image.open(im_file);
        print('image1.size', image1.size);
        info['width'] = image1.size[0]
        info['height'] = image1.size[1]
        if vis:
            im2show = np.copy(im)
        for j in range(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                print('j', j)
                cls_dets.cpu().numpy()
                jindex.append(j)
                if vis:
                    im2show = vis_detections(im2show, j, cls_dets.cpu().numpy(), 0.5)
                    pred = vis_results(j,cls_dets.cpu().numpy(),0.5)
                    print('pred',pred)
                    if(pred!=[]):
                        info['predictions'].append(pred)
                    # print("cls_dets.cpu().numpy()",cls_dets.cpu().numpy())

        # print('cls_dets',cls_dets)
        # box_re = cls_dets.cpu().numpy()
        # print('box_re',box_re)
        # # Loop over the results and add them to the list of returned predictions
        # info = {}
        # filename = os.path.split(im_file)
        # print("filename",filename[1])
        # info['filename'] = filename[1]
        # image1 = Image.open(im_file);
        # print('image1.size', image1.size);
        # info['width'] = image1.size[0]
        # info['height'] = image1.size[1]
        # info['predictions'] = list()
        # j = 0
        # for box in box_re:
        #     r = {"BoxList": [str(i) for i in np.rint(box[:4]).astype(int)]}
        #     r["BoxList"].append(jindex[j])
        #     j=j+1
        #     info['predictions'].append(r)
        #     # Indicate that the request was a success.
        # s = {}
        data["success"] = True
        # s = {im_file: info}
        im_info1[filename[1]]=info
        data['im_info'] = im_info1
        print(data)
        new_data = process(data)
        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()

        if vis and webcam_num == -1:
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)
            result_path = os.path.join(file_dir, imglist[num_images][:-4] + "_det.jpg")
            # cv2.imwrite(result_path, im2show)
        else:
            im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)
            cv2.imshow("frame", im2showRGB)
            total_toc = time.time()
            total_time = total_toc - total_tic
            frame_rate = 1 / total_time
            print('Frame rate:', frame_rate)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    if webcam_num >= 0:
        cap.release()
        cv2.destroyAllWindows()
    return flask.jsonify(new_data)
Beispiel #13
0
    def detect(self, dataset, foldername, filename, ch, vis, bbox_log):
        image_num = os.path.splitext(filename)[0]
        output_folder = 'output/' + dataset + "_ch" + str(ch)
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)

        total_tic = time.time()

        # im = cv2.imread(im_file)
        im_file = foldername + "/" + filename

        im_in = np.array(imread(im_file))

        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im = im_in[:, :, ::-1]

        blobs, im_scales = _get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)


        with torch.no_grad():
            self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            self.gt_boxes.resize_(1, 1, 5).zero_()
            self.num_boxes.resize_(1).zero_()

        # pdb.set_trace()
        det_tic = time.time()

        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, rois_label = self.fasterRCNN(
            self.im_data, self.im_info, self.gt_boxes, self.num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.myargs.class_agnostic:
                    if self.myargs.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.myargs.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im2show = np.copy(im)
        for j in xrange(1, len(self.pascal_classes)):
            inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.myargs.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                if bbox_log:
                    bbox_list = cls_dets.cpu().numpy()
                    for bb in bbox_list:
                        start_x = int(bb[0])
                        start_y = int(bb[1])
                        end_x = int(bb[2])
                        end_y = int(bb[3])
                        confidence = bb[4]
                        if confidence > 0.5:
                            fo.write(
                                str(ch) + "," + image_num + "," + str(start_x) + "," + str(start_y) + "," +
                                str(end_x) + "," + str(end_y) + "," + str(confidence) + "\n"
                            )

                if vis:
                    im2show = vis_detections(im2show, self.pascal_classes[j], cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
        #                       .format(num_images + 1, len(imglist), detect_time, nms_time))
        # sys.stdout.flush()
        if vis:
            result_path = os.path.join(output_folder, str(image_num) + ".jpg")
            cv2.imwrite(result_path, im2show)
Beispiel #14
0
    def forward(self, im_data, im_info, gt_boxes, gt_boxes_sens, num_boxes):
        batch_size = im_data[0].size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        gt_boxes_sens = gt_boxes_sens.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat_c = self.RCNN_base_c(im_data[0])
        base_feat_t = self.RCNN_base_t(im_data[1])
        base_feat_fused = 0.5 * (base_feat_c + base_feat_t)
        base_feat_fused = self.RCNN_base_fused(base_feat_fused)
        conv5_c = self.RCNN_base_f1(base_feat_c)
        conv5_t = self.RCNN_base_f2(base_feat_t)

        # feed fused base feature map to RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat_fused, im_info, gt_boxes, num_boxes)

        # if it is training phase, then use ground truth bboxes for refining
        if self.training:
            # 50% jitter probability
            if np.random.rand(1)[0]>0.5:
                jitter = (torch.randn(1,256,4)/20).cuda()
            else:
                jitter = (torch.zeros(1,256,4)).cuda()
            # feed jitter to obtain rois_align_target
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, gt_boxes_sens, num_boxes, jitter, im_info)
            rois, rois_jittered, rois_label, rois_target, rois_align_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_align_target = Variable(rois_align_target.view(-1, rois_align_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_jittered = copy.deepcopy(rois)
            rois_label = None
            rois_target = None
            rois_align_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0


        # Region Feature Alignment module
        ctx_rois = bbox_contextual_batch(rois)
        clip_boxes(ctx_rois[:,:,1:], im_info, batch_size)
        ctx_rois = Variable(ctx_rois)
        ctx_rois_jittered = bbox_contextual_batch(rois_jittered)
        clip_boxes(ctx_rois_jittered[:,:,1:], im_info, batch_size)
        ctx_rois_jittered = Variable(ctx_rois_jittered)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(ctx_rois.view(-1, 5), conv5_c.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach())
            grid_xy = _affine_grid_gen(ctx_rois_jittered.view(-1, 5), conv5_t.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2)
                pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_c = self.RCNN_roi_align(conv5_c, ctx_rois.view(-1, 5))    
            pooled_feat_t = self.RCNN_roi_align(conv5_t, ctx_rois_jittered.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_c = self.RCNN_roi_pool(conv5_c, ctx_rois.view(-1,5))
            pooled_feat_t = self.RCNN_roi_pool(conv5_t, ctx_rois_jittered.view(-1,5))
        
        pooled_feat_res = pooled_feat_t - pooled_feat_c

        # feed pooled features to top model
        pooled_feat_res = self._head_to_tail_align(pooled_feat_res)
        bbox_align_pred = self.RCNN_bbox_align_pred(pooled_feat_res)

        RCNN_loss_bbox_align = 0
        
        # Apply bounding-box regression deltas
        box_deltas = bbox_align_pred.data
        box_deltas_zeros = torch.zeros(box_deltas.shape).cuda()
        box_deltas = torch.cat((box_deltas, box_deltas_zeros), 1)


        # Optionally normalize targets by a precomputed mean and stdev
        # The roi alignment process is class_agnostic
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(batch_size, -1, 4)

        rois_sens = rois_jittered.new(rois_jittered.size()).zero_()
        rois_sens[:,:,1:5] = bbox_transform_inv(rois_jittered[:,:,1:5], box_deltas, batch_size)

        clip_boxes(rois_sens[:,:,1:5], im_info, batch_size)
        


        rois = Variable(rois)
        rois_sens = Variable(rois_sens)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5), conv5_c.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach())
            grid_xy = _affine_grid_gen(rois_sens.view(-1, 5), conv5_t.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2)
                pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            pooled_feat_c = self.RCNN_roi_align(conv5_c, rois.view(-1, 5))
            pooled_feat_t = self.RCNN_roi_align(conv5_t, rois_sens.view(-1, 5))

        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_c = self.RCNN_roi_pool(conv5_c, rois.view(-1, 5))
            pooled_feat_t = self.RCNN_roi_pool(conv5_t, rois_sens.view(-1, 5))
                                                        
        cls_score_ref = self.confidence_ref(self.RCNN_top_ref(pooled_feat_c.view(pooled_feat_c.size(0), -1)))
        cls_score_sens = self.confidence_sens(self.RCNN_top_sens(pooled_feat_t.view(pooled_feat_t.size(0), -1)))
        cls_prob_ref = F.softmax(cls_score_ref, 1)
        cls_prob_sens = F.softmax(cls_score_sens, 1)

        confidence_ref = torch.abs(cls_prob_ref[:,1]-cls_prob_ref[:,0])
        confidence_sens = torch.abs(cls_prob_sens[:,1]-cls_prob_sens[:,0])
        confidence_ref = confidence_ref.unsqueeze(1).unsqueeze(2).unsqueeze(3)
        confidence_sens = confidence_sens.unsqueeze(1).unsqueeze(2).unsqueeze(3)

        pooled_feat_c = confidence_ref * pooled_feat_c
        pooled_feat_t = confidence_sens * pooled_feat_t
        pooled_feat = pooled_feat_c + pooled_feat_t


        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_cls_ref = 0
        RCNN_loss_cls_sens = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            RCNN_loss_cls_ref = F.cross_entropy(cls_score_ref, rois_label)
            RCNN_loss_cls_sens = F.cross_entropy(cls_score_sens, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
            RCNN_loss_bbox_align = _smooth_l1_loss(bbox_align_pred, rois_align_target[:,:2], rois_inside_ws[:,:2], rois_outside_ws[:,:2])


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, rois_sens, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_cls_ref, RCNN_loss_cls_sens, RCNN_loss_bbox, RCNN_loss_bbox_align, rois_label
Beispiel #15
0
            if args.class_agnostic: # 不用标注具体类别 只识别物体
            	# 乘上标准差再加上方差 得到较准确的bbox
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                box_deltas = box_deltas.view(1, -1, 4)
            else: # 不仅要识别物体 还要标注类别
            	# 乘上标准差再加上方差 得到较准确的bbox
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                # *4的原因是bbox有4个回归系数
                # 下面的一步可能是对于一个bbox 对每个class都计算4个回归系数 取score最大的或者用mask过滤
                # 可能组成一个高度为bbox数量 宽度为4*class数量的矩阵
                box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

          pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) # ??
          pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) # 根据图片的尺寸信息(im_info)裁剪图片外的box
      else: # 没有训练回归器的情形
          # Simply repeat the boxes, once for each class
          # tile()是沿某个维度复制数组的元素 这里是对于每个类都复制一次 ??
          pred_boxes = np.tile(boxes, (1, scores.shape[1]))

      pred_boxes /= data[1][0][2]

      # 把shape中为1的维度去掉
      scores = scores.squeeze()
      pred_boxes = pred_boxes.squeeze()
      det_toc = time.time()
      detect_time = det_toc - det_tic
      misc_tic = time.time()
      if vis:
          im = cv2.imread(imdb.image_path_at(i)) # opencv读入图片
Beispiel #16
0
    def __call__(self, ori_img):
        thresh = 0.5
        allbox = []

        assert isinstance(ori_img, np.ndarray), "input must be a numpy array!"
        if len(ori_img.shape) == 2:
            ori_img = ori_img[:, :, np.newaxis]
            ori_img = np.concatenate((ori_img, ori_img, ori_img), axis=2)

        blobs, im_scales = _get_image_blob(ori_img)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)
        # initilize the tensor holder here.
        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        num_boxes = torch.LongTensor(1)
        gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.device == "cuda":
            im_data = im_data.cuda()
            im_info = im_info.cuda()
            num_boxes = num_boxes.cuda()
            gt_boxes = gt_boxes.cuda()

        # make variable
        im_data = Variable(im_data, volatile=True)
        im_info = Variable(im_info, volatile=True)
        num_boxes = Variable(num_boxes, volatile=True)
        gt_boxes = Variable(gt_boxes, volatile=True)

        with torch.no_grad():
            im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.resize_(1, 1, 5).zero_()
            num_boxes.resize_(1).zero_()

        # infer
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.net(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.device == "cuda":
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_class))
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        im2show = np.copy(ori_img)
        for j in xrange(1, len(self.pascal_class)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                im2show = vis_detections(im2show, self.pascal_class[j],
                                         cls_dets.cpu().numpy(), 0.5)
        return im2show, pred_boxes, scores, cls_dets.cpu().numpy()
Beispiel #17
0
def eval_result(args, logger, epoch, output_dir):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    args.batch_size = 1
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)

    imdb.competition_mode(on=True)

    load_name = os.path.join(output_dir,
                             'thundernet_epoch_{}.pth'.format(epoch, ))

    layer = int(args.net.split("_")[1])
    _RCNN = snet(imdb.classes,
                 layer,
                 pretrained_path=None,
                 class_agnostic=args.class_agnostic)

    _RCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage
                                )  # Load all tensors onto the CPU
    _RCNN.load_state_dict(checkpoint['model'])

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    # hm = torch.FloatTensor(1)
    # reg_mask = torch.LongTensor(1)
    # wh = torch.FloatTensor(1)
    # offset = torch.FloatTensor(1)
    # ind = torch.LongTensor(1)
    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        # hm = hm.cuda()
        # reg_mask = reg_mask.cuda()
        # wh = wh.cuda()
        # offset = offset.cuda()
        # ind = ind.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)
        # hm = Variable(hm)
        # reg_mask = Variable(reg_mask)
        # wh = Variable(wh)
        # offset = Variable(offset)
        # ind = Variable(ind)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        _RCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = True

    if vis:
        thresh = 0.5
    else:
        thresh = 0.5

    save_name = args.net
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(args.dataset, save_name)
    # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
    #                          imdb.num_classes, training=False, normalize=False)
    # dataset = roibatchLoader(roidb, imdb.num_classes, training=False)
    dataset = Detection(roidb,
                        num_classes=imdb.num_classes,
                        transform=BaseTransform(cfg.TEST.SIZE,
                                                cfg.PIXEL_MEANS),
                        training=False)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    _RCNN.eval()

    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    for i in range(num_images):

        data = next(data_iter)

        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])
            # hm.resize_(data[4].size()).copy_(data[4])
            # reg_mask.resize_(data[5].size()).copy_(data[5])
            # wh.resize_(data[6].size()).copy_(data[6])
            # offset.resize_(data[7].size()).copy_(data[7])
            # ind.resize_(data[8].size()).copy_(data[8])

        det_tic = time.time()
        with torch.no_grad():
            time_measure, \
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes,
                               # hm,reg_mask,wh,offset,ind
                               )

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1,
                                                 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        # pred_boxes /= data[1][0][2].item()
        pred_boxes[:, :, 0::2] /= data[1][0][2].item()
        pred_boxes[:, :, 1::2] /= data[1][0][3].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                #keep = gpu_nms(cls_dets.cpu().numpy(), cfg.TEST.NMS)
                #keep = torch.from_numpy(np.array(keep))

                cls_dets_np = cls_dets.cpu().numpy()
                keep = cpu_soft_nms(cls_dets_np,
                                    sigma=0.7,
                                    Nt=0.5,
                                    threshold=0.4,
                                    method=0)
                cls_dets_np = cls_dets_np[keep]

                #cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    vis_detections(im2show, imdb.classes[j],
                                   color_list[j - 1].tolist(), cls_dets_np,
                                   0.6)
                all_boxes[j][i] = cls_dets_np
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write(
            'im_detect: {:d}/{:d} Detect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s) NMS: {:.3f}s\n' \
            .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2],
                    time_measure[3], nms_time))
        sys.stdout.flush()

        if vis and i % 200 == 0 and args.use_tfboard:
            im2show = im2show[:, :, ::-1]
            logger.add_image('pred_image_{}'.format(i),
                             trans.ToTensor()(Image.fromarray(
                                 im2show.astype('uint8'))),
                             global_step=i)

            # cv2.imwrite('result.png', im2show)
            # pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    ap_50 = imdb.evaluate_detections(all_boxes, output_dir)
    logger.add_scalar("map_50", ap_50, global_step=epoch)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
def test_model_while_training(fasterRCNN, args):

    # args = parse_args()
    # args = set_dataset_args(args, test=True)
    # np.random.seed(cfg.RNG_SEED)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.TRAIN.USE_FLIPPED = False

    # args.imdbval_name = 'clipart_test'

    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name_target, False)

    # breakpoint()

    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    # if args.cuda:
    #   fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    thresh = 0.0

    save_name = args.load_name.split('/')[-1]
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                          imdb.num_classes, training=False, normalize = False, path_return=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)
        im_data.data.resize_(data[0].size()).copy_(data[0])
        #print(data[0].size())
        im_info.data.resize_(data[1].size()).copy_(data[1])
        gt_boxes.data.resize_(data[2].size()).copy_(data[2])
        num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        # d_pred = d_pred.data
        path = data[4]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        # misc_toc = time.time()

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s \r' \
            .format(i + 1, num_images, detect_time))
        sys.stdout.flush()

    imdb.evaluate_detections(all_boxes, output_dir)
Beispiel #19
0
def test(args, model=None):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # Load dataset
    imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb(
        args.imdbval_name, False)
    imdb_vu.competition_mode(on=True)
    dataset_vu = roibatchLoader(roidb_vu,
                                ratio_list_vu,
                                ratio_index_vu,
                                query_vu,
                                1,
                                imdb_vu._classes,
                                training=False)

    # initilize the network here.
    if not model:
        if args.net == 'vgg16':
            fasterRCNN = vgg16(imdb_vu.classes,
                               pretrained=False,
                               class_agnostic=args.class_agnostic)
        elif args.net == 'res101':
            fasterRCNN = resnet(imdb_vu.classes,
                                101,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res50':
            fasterRCNN = resnet(imdb_vu.classes,
                                50,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res152':
            fasterRCNN = resnet(imdb_vu.classes,
                                152,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        else:
            print("network is not defined")
        fasterRCNN.create_architecture()

        # Load checkpoint
        print("load checkpoint %s" % (args.weights))
        checkpoint = torch.load(args.weights)
        fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('load model successfully!')
    else:
        # evaluate constructed model
        fasterRCNN = model

    # initialize the tensor holder here.
    im_data = torch.FloatTensor(1)
    query = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    catgory = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()
        im_data = im_data.cuda()
        query = query.cuda()
        im_info = im_info.cuda()
        catgory = catgory.cuda()
        gt_boxes = gt_boxes.cuda()

    # record time
    start = time.time()

    # visiualization
    vis = args.vis if hasattr(args, 'vis') else None
    if vis:
        thresh = 0.05
    else:
        thresh = 0.0
    max_per_image = 100

    fasterRCNN.eval()
    dataset_vu.query_position = 0
    test_scales = cfg.TEST.SCALES
    multiscale_iterators = []
    for i_scale, test_scale in enumerate(test_scales):
        cur_dataloader_vu = torch.utils.data.DataLoader(dataset_vu,
                                                        batch_size=1,
                                                        shuffle=False,
                                                        num_workers=0,
                                                        pin_memory=True)
        cur_data_iter_vu = iter(cur_dataloader_vu)
        multiscale_iterators.append(cur_data_iter_vu)

    # total quantity of testing images, each images include multiple detect class
    num_images_vu = len(imdb_vu.image_index)
    num_detect = len(ratio_index_vu[0])

    all_boxes = [[[] for _ in range(num_images_vu)]
                 for _ in range(imdb_vu.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}

    for i, index in enumerate(ratio_index_vu[0]):
        det_tic = time.time()
        multiscale_boxes = []
        multiscale_scores = []
        for i_scale, (data_iter_vu, test_scale) in enumerate(
                zip(multiscale_iterators, test_scales)):
            # need to rewrite cfg.TRAIN.SCALES - very hacky!
            BACKUP_TRAIN_SCALES = cfg.TRAIN.SCALES
            cfg.TRAIN.SCALES = [test_scale]
            data = next(data_iter_vu)
            cfg.TRAIN.SCALES = BACKUP_TRAIN_SCALES

            with torch.no_grad():
                im_data.resize_(data[0].size()).copy_(data[0])
                query.resize_(data[1].size()).copy_(data[1])
                im_info.resize_(data[2].size()).copy_(data[2])
                gt_boxes.resize_(data[3].size()).copy_(data[3])
                catgory.data.resize_(data[4].size()).copy_(data[4])

                # Run Testing
                if not hasattr(args, "class_image_augmentation"
                               ) or not args.class_image_augmentation:
                    queries = [query]
                elif args.class_image_augmentation.lower() == "rotation90":
                    queries = [query]
                    for _ in range(3):
                        queries.append(queries[-1].rot90(1, [2, 3]))
                else:
                    raise RuntimeError(
                        "Unknown class_image_augmentation: {}".format(
                            args.class_image_augmentation))

                for q in queries:
                    rois, cls_prob, bbox_pred, \
                    rpn_loss_cls, rpn_loss_box, \
                    RCNN_loss_cls, _, RCNN_loss_bbox, \
                    rois_label, weight = fasterRCNN(im_data, q, im_info, gt_boxes, catgory)

                    scores = cls_prob.data
                    boxes = rois.data[:, :, 1:5]

                    # Apply bounding-box regression
                    if cfg.TEST.BBOX_REG:
                        # Apply bounding-box regression deltas
                        box_deltas = bbox_pred.data
                        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                            # Optionally normalize targets by a precomputed mean and stdev
                            if args.class_agnostic:
                                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                                box_deltas = box_deltas.view(1, -1, 4)
                            else:
                                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                                box_deltas = box_deltas.view(
                                    1, -1, 4 * len(imdb_vu.classes))

                        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
                    else:
                        # Simply repeat the boxes, once for each class
                        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

                    # Resize to original ratio
                    pred_boxes /= data[2][0][2].item()

                    # Remove batch_size dimension
                    scores = scores.squeeze()
                    pred_boxes = pred_boxes.squeeze()

                    multiscale_scores.append(scores)
                    multiscale_boxes.append(pred_boxes)

        scores = torch.cat(multiscale_scores, dim=0)
        pred_boxes = torch.cat(multiscale_boxes, dim=0)

        # Record time
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        # Post processing
        inds = torch.nonzero(scores > thresh).view(-1)
        if inds.numel() > 0:
            # remove useless indices
            cls_scores = scores[inds]
            cls_boxes = pred_boxes[inds, :]
            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)

            # rearrange order
            _, order = torch.sort(cls_scores, 0, True)
            cls_dets = cls_dets[order]

            # NMS
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            all_boxes[catgory][index] = cls_dets.cpu().numpy()

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                try:
                    image_scores = all_boxes[catgory][index][:, -1]
                    if len(image_scores) > max_per_image:
                        image_thresh = np.sort(image_scores)[-max_per_image]

                        keep = np.where(
                            all_boxes[catgory][index][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[catgory][index] = all_boxes[catgory][index][
                            keep, :]
                except:
                    pass

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                .format(i + 1, num_detect, detect_time, nms_time))
            sys.stdout.flush()

            # save test image
            if vis and i % 1 == 0:
                im2show = cv2.imread(
                    dataset_vu._roidb[dataset_vu.ratio_index[i]]['image'])
                im2show = vis_detections(im2show, 'shot',
                                         cls_dets.cpu().numpy(), 0.3)

                o_query = data[1][0].permute(1, 2,
                                             0).contiguous().cpu().numpy()
                o_query *= [0.229, 0.224, 0.225]
                o_query += [0.485, 0.456, 0.406]
                o_query *= 255
                o_query = o_query[:, :, ::-1]

                (h, w, c) = im2show.shape
                o_query = cv2.resize(o_query, (h, h),
                                     interpolation=cv2.INTER_LINEAR)
                im2show = np.concatenate((im2show, o_query), axis=1)

                vis_path = "./test_img"
                if not os.path.isdir(vis_path):
                    os.makedirs(vis_path)
                cv2.imwrite(os.path.join(vis_path, "%d_d.png" % (i)), im2show)

    print('Evaluating detections')
    mAP = imdb_vu.evaluate_detections(all_boxes, None)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
    return mAP
      if cfg.TEST.BBOX_REG:
          # Apply bounding-box regression deltas
          box_deltas = bbox_pred.data
          if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
          # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

          pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
          pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
      else:
          # Simply repeat the boxes, once for each class
          pred_boxes = np.tile(boxes, (1, scores.shape[1]))

      pred_boxes /= data[1][0][2]

      scores = scores.squeeze()
      pred_boxes = pred_boxes.squeeze()
      det_toc = time.time()
      detect_time = det_toc - det_tic
      misc_tic = time.time()
      if vis:
          im = cv2.imread(imdb.image_path_at(i))
          im2show = np.copy(im)
      for j in xrange(1, imdb.num_classes):
Beispiel #21
0
def eval_test(fasterRCNN, args, cfg, imdb, dataloader, output_dir):
    # initialize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    start = time.time()
    max_per_image = 100

    vis = args.vis

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = "faster_rcnn_10"
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    data_iter = iter(dataloader)

    _t = {"im_detect": time.time(), "misc": time.time()}
    det_file = os.path.join(output_dir, "detections.pkl")

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):
        data = next(data_iter)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

            # im_data.data.resize_(data[0].size()).copy_(data[0])
            # im_info.data.resize_(data[1].size()).copy_(data[1])
            # gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            # num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label = fasterRCNN(
            im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() +
                                  torch.FloatTensor(
                                      cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda())
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() +
                                  torch.FloatTensor(
                                      cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda())
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write("im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r".format(
            i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite("result.png", im2show)
            pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)
    with open(det_file, "wb") as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print("Evaluating detections")
    imdb.evaluate_detections(all_boxes, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
    if "coco" in args.dataset:
        return imdb.coco_eval
Beispiel #22
0
def rcnn_im_detect(net, im, boxes, feat_list=()):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)
        feat_list: a list that contains feature names you need. (SUPPORT: conv1-conv5, fc, and logit)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
        attr_scores (ndarray): R x M array of attribute class scores
    """
    feat_dict = {
        "conv1": "conv1",
        "conv2": "res2c",
        "conv3": "res3b3",
        "conv4": "res4b22",
        "conv5": "res5c",
        "fc": "pool5_flat",
        "logit": "cls_score"
    }

    blobs, im_scales = _get_blobs(im, boxes)

    # Purpose: save computation resource for duplicated ROIs.
    if cfg.DEDUP_BOXES > 0:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    net.blobs['rois'].reshape(*(blobs['rois'].shape))
    if 'im_info' in net.blobs:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    if 'im_info' in net.blobs:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)

    blobs_out = net.forward(**forward_kwargs)

    feats = []
    if len(feat_list) > 0:
        for f in feat_list:
            feats.append(net.blobs[feat_dict[f]])

    # use softmax estimated probabilities
    scores = blobs_out['cls_prob']

    if cfg.TEST.COMMON.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if 'attr_prob' in net.blobs:
        attr_scores = blobs_out['attr_prob']
    else:
        attr_scores = None

    if 'rel_prob' in net.blobs:
        rel_scores = blobs_out['rel_prob']
    else:
        rel_scores = None

    return scores, pred_boxes, attr_scores, rel_scores, feats
Beispiel #23
0
def eval_frcnn(frcnn_extra, device, fasterRCNN, is_break=False):
    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(frcnn_extra.output_dir, 'detections.pkl')
    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    data_iter_test = iter(frcnn_extra.dataloader_test)
    for i in range(frcnn_extra.num_images_test):
        data_test = next(data_iter_test)
        im_data = data_test[0].to(device)
        im_info = data_test[1].to(device)
        gt_boxes = data_test[2].to(device)
        num_boxes = data_test[3].to(device)
        det_tic = time.time()
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if frcnn_extra.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(frcnn_extra.imdb_test.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data_test[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        for j in range(1, frcnn_extra.imdb_test.num_classes):
            inds = torch.nonzero(scores[:, j] > frcnn_extra.thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if frcnn_extra.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                frcnn_extra.all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                frcnn_extra.all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if frcnn_extra.max_per_image > 0:
            image_scores = np.hstack([frcnn_extra.all_boxes[j][i][:, -1]
                                      for j in range(1, frcnn_extra.imdb_test.num_classes)])
            if len(image_scores) > frcnn_extra.max_per_image:
                image_thresh = np.sort(image_scores)[-frcnn_extra.max_per_image]
                for j in range(1, frcnn_extra.imdb_test.num_classes):
                    keep = np.where(frcnn_extra.all_boxes[j][i][:, -1] >= image_thresh)[0]
                    frcnn_extra.all_boxes[j][i] = frcnn_extra.all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic
        if is_break:
            break
    ap = frcnn_extra.imdb_test.evaluate_detections(frcnn_extra.all_boxes, frcnn_extra.output_dir)
    return ap
def get_detections_from_im(fasterRCNN,
                           classes,
                           im_file,
                           args,
                           conf_thresh=0.2):
    """obtain the image_info for each image,
    im_file: the path of the image

    return: dict of {'image_id', 'image_h', 'image_w', 'num_boxes', 'boxes', 'features'}
    boxes: the coordinate of each box
    """
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()

    #load images
    # im = cv2.imread(im_file)
    im_in = np.array(imread(im_file))
    if len(im_in.shape) == 2:
        im_in = im_in[:, :, np.newaxis]
        im_in = np.concatenate((im_in, im_in, im_in), axis=2)
    # rgb -> bgr
    im = im_in[:, :, ::-1]

    vis = True

    blobs, im_scales = _get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                          dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()
    # pdb.set_trace()
    det_tic = time.time()

    # the region features[box_num * 2048] are required.
    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label, pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, pool_feat = True)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    det_toc = time.time()
    detect_time = det_toc - det_tic
    misc_tic = time.time()

    max_conf = torch.zeros((pred_boxes.shape[0]))
    if args.cuda > 0:
        max_conf = max_conf.cuda()

    if vis:
        im2show = np.copy(im)
    for j in xrange(1, len(classes)):
        inds = torch.nonzero(scores[:, j] > conf_thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            index = inds[order[keep]]
            max_conf[index] = torch.where(scores[index, j] > max_conf[index],
                                          scores[index, j], max_conf[index])
            if vis:
                im2show = vis_detections(im2show, classes[j],
                                         cls_dets.cpu().numpy(), 0.5)

    if args.cuda > 0:
        keep_boxes = torch.where(max_conf >= conf_thresh, max_conf,
                                 torch.tensor(0.0).cuda())
    else:
        keep_boxes = torch.where(max_conf >= conf_thresh, max_conf,
                                 torch.tensor(0.0))
    keep_boxes = torch.squeeze(torch.nonzero(keep_boxes), dim=-1)
    if len(keep_boxes) < MIN_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
    elif len(keep_boxes) > MAX_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]

    objects = torch.argmax(scores[keep_boxes][:, 1:], dim=1)
    box_dets = np.zeros((len(keep_boxes), 4))
    boxes = pred_boxes[keep_boxes]
    name_list = []
    box_caption_feature = np.zeros((len(keep_boxes), 300))
    box_caption_mask = np.ones(len(keep_boxes))
    for i in range(len(keep_boxes)):
        kind = objects[i] + 1
        bbox = boxes[i, kind * 4:(kind + 1) * 4]
        tmp_dets = np.array(bbox.cpu())
        if (tmp_dets[2] - tmp_dets[0]) * (tmp_dets[3] - tmp_dets[1]) <= 10:
            box_caption_mask[i] = 0
        class_name = classes[1:][objects[i]]
        box_dets[i] = tmp_dets
        name_list.append(class_name)
        doc = nlp1(class_name)
        token_vector = nlp2(doc[0].text).vector
        box_caption_feature[i, :] = token_vector

    return {
        'image_h': np.size(im, 0),
        'image_w': np.size(im, 1),
        'num_boxes': len(keep_boxes),
        #'boxes': box_dets, # region shape 4 * 36, 4 is the xy positions
        #'features': (pooled_feat[keep_boxes].cpu()).detach().numpy(),
        'text': name_list,
        #'text_feature': box_caption_feature,
        # 'text_mask': box_caption_mask
    }
Beispiel #25
0
def inference(input_np, fasterRCNN, own_data_classes, cfg_file='cfgs/vgg16.yml', cuda=True, cfg_list=None):
  cfg_from_file(cfg_file)
  if not (cfg_list is None):
    cfg_from_list(cfg_list)
  cfg.USE_GPU_NMS = cuda
  np.random.seed(cfg.RNG_SEED)

  # initilize the tensor holder here.
  im_data = torch.FloatTensor(1)
  im_info = torch.FloatTensor(1)
  num_boxes = torch.LongTensor(1)
  gt_boxes = torch.FloatTensor(1)

  # ship to cuda
  if cuda:
    im_data = im_data.cuda()
    im_info = im_info.cuda()
    num_boxes = num_boxes.cuda()
    gt_boxes = gt_boxes.cuda()

  # make variable
  im_data = Variable(im_data, volatile=True)
  im_info = Variable(im_info, volatile=True)
  num_boxes = Variable(num_boxes, volatile=True)
  gt_boxes = Variable(gt_boxes, volatile=True)

  if cuda:
    cfg.CUDA = True
  else:
    cfg.CUDA = False

  fasterRCNN.eval()

  # Load the demo image
  im_in = input_np
  # im_in = np.array(imread(im_file))
  if len(im_in.shape) == 2:
    im_in = im_in[:,:,np.newaxis]
    im_in = np.concatenate((im_in,im_in,im_in), axis=2)
  # rgb -> bgr
  im = im_in[:,:,::-1]

  blobs, im_scales = _get_image_blob(im, cfg)
  assert len(im_scales) == 1, "Only single-image batch implemented"
  im_blob = blobs
  im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

  im_data_pt = torch.from_numpy(im_blob)
  im_data_pt = im_data_pt.permute(0, 3, 1, 2)
  im_info_pt = torch.from_numpy(im_info_np)

  im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
  im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
  gt_boxes.data.resize_(1, 1, 5).zero_()
  num_boxes.data.resize_(1).zero_()

  rois, cls_prob, bbox_pred, \
  rpn_loss_cls, rpn_loss_box, \
  RCNN_loss_cls, RCNN_loss_bbox, \
  rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

  scores = cls_prob.data
  boxes = rois.data[:, :, 1:5]

  if cfg.TEST.BBOX_REG:
      # Apply bounding-box regression deltas
      box_deltas = bbox_pred.data
      if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
          if cuda:
              box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
          else:
              box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
          box_deltas = box_deltas.view(1, -1, 4 * len(own_data_classes))

      pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
      pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
  else:
      # Simply repeat the boxes, once for each class
      pred_boxes = np.tile(boxes, (1, scores.shape[1]))

  pred_boxes /= im_scales[0]

  scores = scores.squeeze()
  pred_boxes = pred_boxes.squeeze()

  boxes_output = np.empty(shape=[0, 4], dtype=np.uint16)
  gt_classes_output = []
  ishards_output = np.empty(shape=[0], dtype=np.int32)

  thresh = 0.05
  #循环分析每个图片
  for j in xrange(1, len(own_data_classes)):
      inds = torch.nonzero(scores[:,j]>thresh).view(-1)
      # if there is det
      if inds.numel() > 0:
        cls_scores = scores[:,j][inds]
        _, order = torch.sort(cls_scores, 0, True)
        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
        
        cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
        # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
        cls_dets = cls_dets[order]
        keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
        cls_dets = cls_dets[keep.view(-1).long()]
        for i_box in range(cls_dets.shape[0]):
          if cls_dets[i_box,4]>thresh and filter_bndbox(cls_dets[i_box,:4], ratio=0.2):
            boxes_output = np.append(boxes_output, np.expand_dims(cls_dets[i_box,:4], axis=0), axis=0).astype(np.uint16)
            # ishard is 0 as default.
            ishards_output = np.append(ishards_output, [0], axis=0)
            gt_classes_output.append(own_data_classes[j])

  objs_info = {'boxes': boxes_output,
               'classes_name': gt_classes_output,
               'gt_ishard': ishards_output}

  return objs_info
Beispiel #26
0
        box_delta_right = box_delta_right.view(1, -1, 4 * len(kitti_classes))
        dim_orien = dim_orien.view(1, -1, 5 * len(kitti_classes))
        kpts_delta = kpts_delta.view(1, -1, 1)
        left_delta = left_delta.view(1, -1, 1)
        right_delta = right_delta.view(1, -1, 1)
        max_prob = max_prob.view(1, -1, 1)

        pred_boxes_left = bbox_transform_inv(boxes_left, box_delta_left, 1)
        pred_boxes_right = bbox_transform_inv(boxes_right, box_delta_right, 1)
        pred_kpts, kpts_type = kpts_transform_inv(boxes_left, kpts_delta,
                                                  cfg.KPTS_GRID)
        pred_left = border_transform_inv(boxes_left, left_delta, cfg.KPTS_GRID)
        pred_right = border_transform_inv(boxes_left, right_delta,
                                          cfg.KPTS_GRID)

        pred_boxes_left = clip_boxes(pred_boxes_left, im_info.data, 1)
        pred_boxes_right = clip_boxes(pred_boxes_right, im_info.data, 1)

        pred_boxes_left /= im_info[0, 2].data
        pred_boxes_right /= im_info[0, 2].data
        pred_kpts /= im_info[0, 2].data
        pred_left /= im_info[0, 2].data
        pred_right /= im_info[0, 2].data

        scores = scores.squeeze()
        pred_boxes_left = pred_boxes_left.squeeze()
        pred_boxes_right = pred_boxes_right.squeeze()

        pred_kpts = torch.cat(
            (pred_kpts, kpts_type, max_prob, pred_left, pred_right), 2)
        pred_kpts = pred_kpts.squeeze()
def main(finput, foutput, fmodel, fclass):
    """ Predict images from `fileinput` using `model` and saves predictions in `output`. """
    if not foutput:
        foutput = join(dirname(finput), 'predictions.csv')
    fout = open(foutput, 'w')
    fout.write('Frame;xmin;ymin;xmax;ymax;id_class;score\n')
    check_files([finput, fmodel, fclass])
    pascal_classes = load_classes(fclass)
    dic_classes = load_classes(fclass, dic=True, inverse=True)

    load_name = fmodel
    # initilize the network here.
    #if args.net == 'vgg16':
    #fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic)
    fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=False)
    fasterRCNN.create_architecture()

    logger.info("Load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']
    logger.info('load model successfully!')

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    im_data = im_data.cuda()
    im_info = im_info.cuda()
    num_boxes = num_boxes.cuda()
    gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data, volatile=True)
    im_info = Variable(im_info, volatile=True)
    num_boxes = Variable(num_boxes, volatile=True)
    gt_boxes = Variable(gt_boxes, volatile=True)

    fasterRCNN.cuda()
    fasterRCNN.eval()

    max_per_image = 100
    thresh = 0.05

    imglist = load_image_paths(finput)
    num_images = len(imglist)
    logger.info('Loaded Photo: {} images.'.format(num_images))

    pb = pbar.ProgressBar(num_images)
    for im_file in imglist:
        im_in = np.array(imread(im_file))
        # rgb -> bgr
        im = im_in[:,:,::-1]

        im_blob, im_scales = _get_image_blob(im)
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.data.resize_(1, 1, 5).zero_()
        num_boxes.data.resize_(1).zero_()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        # Optionally normalize targets by a precomputed mean and stdev
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        im2show = np.copy(im)
        coordinates = []
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:,j]>thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:,j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                #im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5)
                class_name = pascal_classes[j]
                write_detections(fout, basename(im_file)[:-4], dic_classes[class_name], cls_dets.cpu().numpy(), 0.5)
            
        #result_path = os.path.join('/home/roger/', basename(im_file)[:-4] + "_det.jpg")
        #logger.info('Saved file: {}'.format(result_path))
        #cv2.imwrite(result_path, im2show)
        pb.update()
    fout.close()
Beispiel #28
0
def evaluator(model, args, evl_rec=False):

    fasterRCNN = model
    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = False

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)

    # These models are pytorch pretrained with RGB channel
    rgb = True if args.net in ('res18', 'res34', 'inception') else False

    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
               imdb.num_classes, training=False, normalize = False, rgb=rgb)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)
    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    if evl_rec:
        true_postive, ground_truth = 0.0, 0.0
        recall = AverageMeter()

    for i in range(num_images):

        data = next(data_iter)
        im_data.data.resize_(data[0].size()).copy_(data[0])
        im_info.data.resize_(data[1].size()).copy_(data[1])
        gt_boxes.data.resize_(data[2].size()).copy_(data[2])
        num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                          + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                          + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        if evl_rec:
            # evluate rpn recall only
            boxes_per_img = boxes.squeeze().cpu().numpy() / data[1][0][2].item(
            )
            #pdb.set_trace()
            #TP, GT = evaluate_final_recall(pred_boxes.squeeze().cpu().numpy(), i, imdb, thr=0.5)
            TP, GT = evaluate_recall(boxes_per_img, i, imdb, thr=0.5)
            recall.update(TP, GT)

            sys.stdout.write('TP/GT: {}/{} | Recall: {:.3f} \r'.format(
                TP, GT, recall.avg))
            sys.stdout.flush()
            continue

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
          .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            #cv2.imshow('test', im2show)
            #cv2.waitKey(0)

    if evl_rec:
        print('\r\nThe average rpn recall is: {:.4f}'.format(recall.avg))
        return recall.avg

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    mAP = imdb.evaluate_detections(all_boxes, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
    return mAP
Beispiel #29
0
def stomata_count(fasterRCNN, image, cuda, pascal_classes):
    if cuda:
        cfg.USE_GPU_NMS = True
    im_in = image
    if len(im_in.shape) == 2:
        im_in = im_in[:, :, np.newaxis]
        im_in = np.concatenate((im_in, im_in, im_in), axis=2)

    blobs, im_scales = _get_image_blob(im_in)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    # initilize the tensor holder here.
    im_data = torch.FloatTensor()
    im_info = torch.FloatTensor()
    num_boxes = torch.LongTensor()
    gt_boxes = torch.FloatTensor()

    # ship to cuda
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    with torch.no_grad():
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]
    class_agnostic = False
    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            if class_agnostic:
                if cuda:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if cuda:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    num_stomata = 0
    label_stomata = np.copy(image)
    for j in xrange(1, len(pascal_classes)):
        inds = torch.nonzero(scores[:, j] > int(0.5)).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            dets = cls_dets.cpu().numpy()
            label_stomata, num_stomata = vis_detections(label_stomata, pascal_classes[j], dets, 0.9)
    return num_stomata, label_stomata
Beispiel #30
0
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
            pred_boxes = _.cuda() if args.cuda > 0 else _

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im2show = np.copy(im)
        for j in xrange(1, len(pascal_classes)):
Beispiel #31
0
def eval_one_dataloader(save_dir_test_out, dataloader_t, fasterRCNN, device, imdb, target_num=0,
                        class_agnostic=False, thresh=0.0, max_per_image=100, return_ap_class=False):

    save_name = save_dir_test_out + '_test_in_'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    data_iter = iter(dataloader_t)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    #fasterRCNN.training = False
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)

        im_data = data[0].to(device)
        im_info = data[1].to(device)
        gt_boxes = data[2].to(device)
        num_boxes = data[3].to(device)
        with torch.no_grad():
            if isinstance(fasterRCNN, frcnn_htcn) or isinstance(fasterRCNN, frcnn_htcn_m):
                det_tic = time.time()
                rois , cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label, _, _, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, target_num=target_num)
            elif isinstance(fasterRCNN, frcnn_saito):
                det_tic = time.time()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            else:
                det_tic = time.time()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        # d_pred = d_pred.data
        # path = data[4]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()  # [1, 300, 2] -> [300, 2]
        pred_boxes = pred_boxes.squeeze()  # [1, 300, 8] -> [300, 8]
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh, as_tuple=False).view(-1)  # [300]
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]  # [300]
                _, order = torch.sort(cls_scores, 0, True)
                if class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]  # [300, 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)  # [300, 5]
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS)
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)  # [N, 1]
                cls_dets = cls_dets[keep.view(-1).long()]  # [N, 5]

                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(1, imdb.num_classes)])  # [M,]
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
        #                  .format(i + 1, num_images, detect_time, nms_time))
        # sys.stdout.flush()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    map, ap_per_class = imdb.evaluate_detections(all_boxes, output_dir)
    #fasterRCNN.training =
    del scores
    del boxes
    del all_boxes
    del pred_boxes
    del rois
    del cls_prob
    del bbox_pred
    del rpn_loss_cls
    del rpn_loss_box
    del RCNN_loss_cls
    del RCNN_loss_bbox
    del rois_label


    if return_ap_class:
        return map, ap_per_class
    return map