def im_detect(net, image):
    """Detect object classes in an image given object proposals.
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    im_data, im_scales = net.get_image_blob(image)
    im_info = np.array([[im_data.shape[1], im_data.shape[2], im_scales[0]]],
                       dtype=np.float32)

    t0 = time.time()
    cls_prob, bbox_pred, rois = net(im_data, im_info)
    runtime = time.time() - t0

    scores = cls_prob.data.cpu().numpy()
    boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data.cpu().numpy()
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, image.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    return scores, pred_boxes, runtime
    def interpret_faster_rcnn(self, cls_prob, bbox_pred, rois, im_info, nms=True, min_score=0.0, nms_thresh=0.3):
        scores = cls_prob.data.squeeze()
        # find class
        scores, inds = scores.max(1)
        keep = ((inds > 0) & (scores >= min_score)).nonzero().squeeze()
        scores, inds = scores[keep], inds[keep]

        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data.squeeze()[keep]
        boxes = rois.data.squeeze()[:, 1:5][keep]

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()

        box_deltas = box_deltas.view(-1, 4 * self.n_classes)
        box_deltas = torch.cat([box_deltas[i, (inds[i] * 4): (inds[i] * 4 + 4)] \
                                for i in range(len(inds))], 0)
        box_deltas = box_deltas.view(-1, 4)
        boxes, box_deltas = boxes.unsqueeze(0), box_deltas.unsqueeze(0)
        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        pred_boxes = pred_boxes.squeeze()
        pred_boxes /= im_info.data[0][2]
        # nms
        if nms and pred_boxes.size(0) > 0:
            pred_boxes, scores, inds = nms_detections(pred_boxes, scores, nms_thresh, inds=inds)
        pred_boxes = pred_boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        inds = inds.cpu().numpy()

        return pred_boxes, scores, self.classes[inds]
def im_detect(net_x, image_0, image_1):
    """Detect object classes in an image given object proposals.
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    im_data_0, im_scales_0 = net_x.get_image_blob(image_0)
    # im_data_0=0*im_data_0
    im_data_1, im_scales_1 = net_x.get_image_blob(image_1)

    im_info = np.array(
        [[im_data_0.shape[1], im_data_0.shape[2], im_scales_0[0]]],
        dtype=np.float32)

    cls_prob_0, bbox_pred_0, rois = net_x(im_data_0, im_data_1, im_info)
    scores_0 = cls_prob_0.data.cpu().numpy()
    boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas_0 = bbox_pred_0.data.cpu().numpy()
        pred_boxes_0 = bbox_transform_inv(boxes, box_deltas_0)
        pred_boxes_0 = clip_boxes(pred_boxes_0, image_0.shape)

    else:
        print "bbox reg compulsory"
        exit(1)

    return scores_0, pred_boxes_0
Esempio n. 4
0
def im_detect(net, im_data, im_info):
    features, pooled_features, cls_score, cls_prob, bbox_pred, rois, score = net(
        im_data, im_info, gt_boxes=None)

    scores = cls_prob.data.cpu().numpy()
    # boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]
    boxes = rois.data.cpu().numpy()[:, 1:5]
    # Apply bounding-box regression deltas
    box_deltas = bbox_pred.data.cpu().numpy()
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    # pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2])
    pred_boxes = clip_boxes(pred_boxes, im_info[0][:2])

    return scores, pred_boxes, rois
    def predict_image(self, image, threshold, eval_mode=False):
        """
        Infer buildings for a single image.
        Inputs:
            image :: n x m x 3 ndarray - Should be in RGB format
        """

        if type(image) is str:
            image = cv2.imread(image)
        else:
            image = image[:, :, (2, 1, 0)]  # RGB -> BGR

        im_data, im_scales = self.model.get_image_blob(image)
        im_info = np.array(
            [[im_data.shape[1], im_data.shape[2], im_scales[0]]],
            dtype=np.float32)

        t0 = time.time()
        cls_prob, bbox_pred, rois = self.model(im_data, im_info)
        runtime = time.time() - t0

        scores = cls_prob.data.cpu().numpy()
        boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data.cpu().numpy()
            pred_boxes = bbox_transform_inv(boxes, box_deltas)
            pred_boxes = clip_boxes(pred_boxes, image.shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        current = np.concatenate(
            [
                pred_boxes[:, 4:8],  # (skip the background class)
                np.expand_dims(scores[:, 1], 1)
            ],
            axis=1)

        suppressed = current[py_cpu_nms(current.astype(np.float32), 0.3)]
        suppressed = pandas.DataFrame(
            suppressed, columns=['x1', 'y1', 'x2', 'y2', 'score'])
        if eval_mode:
            return suppressed[
                suppressed['score'] >= threshold], suppressed, runtime
        else:
            return suppressed[suppressed['score'] >= threshold]
    def interpret_faster_rcnn(self,
                              cls_prob,
                              bbox_pred,
                              rois,
                              im_info,
                              im_shape,
                              nms=True,
                              clip=True,
                              min_score=0.0):
        # find class
        scores, inds = cls_prob.data.max(1)
        scores, inds = scores.cpu().numpy(), inds.cpu().numpy()

        keep = np.where((inds > 0) & (scores >= min_score))
        scores, inds = scores[keep], inds[keep]

        # Apply bounding-box regression deltas
        keep = keep[0]
        box_deltas = bbox_pred.data.cpu().numpy()[keep]
        box_deltas = np.asarray([
            box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)]
            for i in range(len(inds))
        ],
                                dtype=np.float)
        boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2]
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        if clip:
            pred_boxes = clip_boxes(pred_boxes, im_shape)

        # nms
        if nms and pred_boxes.shape[0] > 0:
            pred_boxes, scores, inds = nms_detections(pred_boxes,
                                                      scores,
                                                      0.3,
                                                      inds=inds)

        return pred_boxes, scores, self.classes[inds]
Esempio n. 7
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride,
                   anchor_scales,
                   anchor_ratios,
                   is_relationship=False):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    # layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(scales=anchor_scales, ratios=anchor_ratios)
    _num_anchors = _anchors.shape[0]
    # don't need transpose for pytorch
    # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2])
    # rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])

    # rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    # cfg_key = 'TEST'
    if is_relationship:
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N_REGION
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N_REGION
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH_REGION
        min_size = cfg[cfg_key].RPN_MIN_SIZE_REGION
    else:
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    # im_info = bottom[2].data[0, :]

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image

    if cfg.TEST.RPN_DROPOUT_BOXES_RUNOFF_IMAGE:
        _allowed_border = 16
        inds_inside = np.where(
            (proposals[:, 0] >= -_allowed_border)
            & (proposals[:, 1] >= -_allowed_border)
            & (proposals[:, 2] < im_info[1] + _allowed_border) &  # width
            (proposals[:, 3] < im_info[0] + _allowed_border)  # height
        )[0]
        proposals = proposals[inds_inside, :]

    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 2000)
    # 8. return the top proposals (-> RoIs top)
    # print 'proposals', proposals
    # print 'scores', scores
    keep = nms(np.hstack((proposals, scores)).astype(np.float32), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    # print(blob.shape)

    return blob, scores.reshape(-1)
Esempio n. 8
0
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[16, ],
                   anchor_scales=[4, 8, 16, 32]):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W)
    # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2])              # -> (1 , Ax4, H , W)

    # rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    # cfg_key = 'TEST'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    # im_info = bottom[2].data[0, :]

    if DEBUG:
        print(('im_size: ({}, {})'.format(im_info[0], im_info[1])))
        print(('scale: {}'.format(im_info[2])))

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print(('score map size: {}'.format(scores.shape)))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
Esempio n. 9
0
            det_tic = t.tic()
            cls_prob, bbox_pred, rois = net(im_data, im_info, gt_boxes,
                                            num_boxes)

            scores = cls_prob.data
            box_deltas = bbox_pred.data
            boxes = rois.data[:, :, 1:5]
            del cls_prob, bbox_pred, rois

            if cfg.TEST.BBOX_REG:
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

            else:
                # scores.shape[1] is (cfg)BATCH_SIZE = P
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= data[1][0][2]

            # P x n_classes
            scores = scores.squeeze()
            # P x n_classes*4
            pred_boxes = pred_boxes.squeeze()
            det_toc = t.tic()
            detect_time = det_toc - det_tic
            misc_tic = t.tic()
Esempio n. 10
0
    def interpret_faster_rcnn(self,
                              cls_prob,
                              bbox_pred,
                              rois,
                              im_info,
                              im_shape,
                              nms=True,
                              clip=True,
                              min_score=0.0):
        """
        1. Filter proposals with (proposal_max_class_not_background && proposal_max_class_score>=thresh).
        2. Combine bbox_pred(p', 4*21_cls) and rois(p', 5) to regression bbox(p', 4*21_cls).
        3. Clip bbox.
        4. Use nms to filter overlap bbox.

        :param cls_prob:    (proposals, 21_cls)     tensor
        :param bbox_pred:   (proposals, 4*21_cls)   tensor
        :param rois:        (proposals, 5)          tensor
        :param im_info:
        :param im_shape:
        :param nms:
        :param clip:
        :param min_score:
        :return:    pred_boxes (p", 4)
                    scores (p",)
                    classes_string (p",)
        """

        # ============================= filter proposals =============================
        # find the max score class
        # (proposals,)  numpy
        # (proposals,)  numpy
        scores, inds = cls_prob.data.max(1)
        scores, inds = scores.cpu().numpy(), inds.cpu().numpy()

        # filter foreground and scores >= thesh proposals.
        # ([index, index, ...], )
        keep = np.where((inds > 0) & (scores >= min_score))
        # p'=len(keep[0])
        # (p', ) numpy
        # (p', ) numpy
        scores, inds = scores[keep], inds[keep]

        # =================== Apply bounding-box regression deltas ======================================
        keep = keep[0]
        box_deltas = bbox_pred.data.cpu().numpy()[keep]
        box_deltas = np.asarray([
            box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)]
            for i in range(len(inds))
        ],
                                dtype=np.float)
        boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2]
        # (p', 4)
        pred_boxes = bbox_transform_inv(boxes, box_deltas)

        # =================================== clip and nms =========================================
        if clip:
            pred_boxes = clip_boxes(pred_boxes, im_shape)

        # (p", 4) numpy
        # (p",)   numpy
        # (p",)   numpy
        if nms and pred_boxes.shape[0] > 0:
            pred_boxes, scores, inds = nms_detections(pred_boxes,
                                                      scores,
                                                      0.3,
                                                      inds=inds)

        return pred_boxes, scores, self.classes[inds]