예제 #1
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    # now in blobs['data'] there are images
    # and in blobs['rois'] proposals
    ################################################### YOUR CODE GOES HERE
    # reshape network inputs to match blobs['data'].shape and blobs['rois'].shape
    #

    # do forward with blobs['data'] and blobs['rois']
    #

    # use softmax estimated probabilities (net output)
    # scores = ...

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
예제 #2
0
def im_detect(net, im, boxes):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    # use softmax estimated probabilities
    scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
예제 #3
0
def compute_rois_offset(rois, offset, im_info=None):
    """Compute bounding-box offset for region of interests"""

    
    assert rois.shape[1] == 4
    assert offset.shape[1] == 4
    
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev -- reverse the transformation
        offset_unnorm = offset * np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS) + np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
    else:
        offset_unnorm = offset.copy()
    rois_offset = bbox_transform_inv(rois, offset_unnorm)
    if not im_info is None:         
        rois_offset = clip_boxes(rois_offset, im_info[:2])
    return rois_offset
    def forward(self, bottom, top):
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = bottom[0].data
        aaa = all_rois[:]
        # GT boxes (x1, y1, x2, y2, label)
        # TODO(rbg): it's annoying that sometimes I have extra info before
        # and other times after box coordinates -- normalize to one format
        gt_boxes = bottom[1].data
        im = bottom[2].data
        # Include ground-truth boxes in the set of candidate rois
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1]))))

        num_images = 1
        rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
        fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)

        rois, labels, bbox_targets, bbox_weights, layer_indexs = _sample_rois(
            all_rois,
            gt_boxes,
            fg_rois_per_image,
            rois_per_image,
            self._num_classes,
            sample_type='fpn',
            k0=4)
        vis = False
        if vis:
            ind = np.where(labels != 0)[0]
            im_shape = im.shape
            means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS),
                            (21, 1)).ravel()
            stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS),
                           (21, 1)).ravel()
            bbox_targets = bbox_targets * stds + means

            pred_boxes = bbox_transform_inv(rois[:, 1:], bbox_targets)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
            l = labels[ind]
            ro = rois[ind, 1:]
            b = bbox_targets[ind, :]
            p = pred_boxes[ind, :] * bbox_weights[ind, :]
            r = []
            for i in range(p.shape[0]):
                r.append(p[i, l[i] * 4:l[i] * 4 + 4])
            r_ = np.vstack(r)

            #  Optionally normalize targets by a precomputed mean and stdev

            vis_all_detection(im, aaa[:, 1:], l, 1)

        labels_all = []
        bbox_targets_all = []
        bbox_weights_all = []
        rois_all = []
        for i in range(4):
            index = (layer_indexs == (i + 2))
            num_index = sum(index)
            if num_index == 0:
                rois_ = np.zeros((1 * 4, 5), dtype=rois.dtype)
                labels_ = np.ones((1 * 4, ), dtype=labels.dtype) * -1
                bbox_targets_ = np.zeros((1 * 4, self._num_classes * 4),
                                         dtype=bbox_targets.dtype)
                bbox_weights_ = np.zeros((1 * 4, self._num_classes * 4),
                                         dtype=bbox_weights.dtype)
            else:
                rois_ = rois[index, :]
                labels_ = labels[index]
                bbox_weights_ = bbox_weights[index, :]
                bbox_targets_ = bbox_targets[index, :]

            rois_all.append(rois_)
            labels_all.append(labels_)
            bbox_targets_all.append(bbox_targets_)
            bbox_weights_all.append(bbox_weights_)

        rois_p2 = rois_all[0]
        rois_p3 = rois_all[1]
        rois_p4 = rois_all[2]
        rois_p5 = rois_all[3]
        labels_all = np.concatenate(labels_all)
        bbox_targets_all = np.concatenate(bbox_targets_all, axis=0)
        bbox_weights_all = np.concatenate(bbox_weights_all, axis=0)
        #  print bbox_targets_all.shape,bbox_weights_all.shape, rois_p2.shape,rois_p3.shape,rois_p4.shape,rois_p5.shape,labels_all.shape

        top[0].reshape(*rois_p2.shape)
        top[0].data[...] = rois_p2

        top[1].reshape(*rois_p3.shape)
        top[1].data[...] = rois_p3

        top[2].reshape(*rois_p4.shape)
        top[2].data[...] = rois_p4

        top[3].reshape(*rois_p5.shape)
        top[3].data[...] = rois_p5

        # classification labels
        top[4].reshape(*labels_all.shape)
        top[4].data[...] = labels_all

        # bbox_targets
        top[5].reshape(*bbox_targets_all.shape)
        top[5].data[...] = bbox_targets_all

        # bbox_inside_weights
        top[6].reshape(*bbox_weights_all.shape)
        top[6].data[...] = bbox_weights_all

        # bbox_outside_weights
        top[7].reshape(*bbox_weights_all.shape)
        top[7].data[...] = np.array(bbox_weights_all > 0).astype(np.float32)
예제 #5
0
def run_batch(sess, net, inputs, outputs, ims, batch_boxes, batch_relations,
              bbox_reg, multi_iter):
    mi = multi_iter[-1]
    results = list()

    #t_start = time.time()
    # Convert an image and RoIs within that image into network inputs.
    im_scaled_list, rois_list = list(), list()
    for im, boxes in zip(ims, batch_boxes):
        im_scaled, im_scale_factors = _get_image_blob(im)
        rois = _get_rois_blob(boxes, im_scale_factors)
        im_scaled_list.append(im_scaled[0])
        rois_list.append(rois)

    conv_outs = sess.run(net.layers['conv_out'],
                         feed_dict={
                             inputs['ims']: np.stack(im_scaled_list, axis=0),
                             net.keep_prob: 1,
                         })
    #print 'VGG takes', time.time() - t_start

    #t_start = time.time()
    for i in range(len(rois_list)):
        conv_out, rois, relations = np.expand_dims(
            conv_outs[i], axis=0), rois_list[i], batch_relations[i]
        relations = np.array(relations,
                             dtype=np.int32)  # all possible combinations
        num_roi = rois.shape[0]
        num_rel = relations.shape[0]

        feed_dict = {
            net.layers['conv_out']:
            conv_out,
            inputs['rois']:
            rois,
            inputs['relations']:
            relations,
            inputs['rel_rois']:
            data_utils.compute_rel_rois(num_rel, rois, relations),
            net.keep_prob:
            1
        }

        inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations)
        for k in inputs_feed:
            feed_dict[inputs[k]] = inputs_feed[k]

        ops_value = sess.run(outputs, feed_dict=feed_dict)

        rel_probs = None
        rel_probs_flat = ops_value['rel_probs'][mi]
        rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]])
        for i, rel in enumerate(relations):
            rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :]

        cls_probs = ops_value['cls_probs'][mi]

        if bbox_reg:
            # Apply bounding-box regression deltas
            pred_boxes = bbox_transform_inv(boxes,
                                            ops_value['bbox_deltas'][mi])
            pred_boxes = clip_boxes(pred_boxes, im.shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, cls_probs.shape[1]))

        results.append({
            'scores': cls_probs,
            'boxes': pred_boxes,
            'relations': rel_probs
        })

    #print 'Scene takes', time.time() - t_start

    return results
예제 #6
0
    def visualize(self, net, filename):

        blobs_out = net.forward()

        try:
            im = net.blobs['data'].data[0].copy()
        except:
            im = net.blobs['image'].data[0].copy()

        im = im.transpose((1, 2, 0))  # ch x h x w -> h x w x ch
        im += cfg.PIXEL_MEANS
        im = im[:, :, (2, 1, 0)]

        im_scale = float(cfg.TEST.SCALES[0]) / float(min(im.shape[:2]))

        if cfg.TEST.HAS_RPN:
            # assert len(im_scale) == 1, "Only single-image batch implemented"
            rois = net.blobs['rois'].data.copy()
            # unscale back to raw image space
            boxes = rois[:, 1:5] / im_scale
        elif cfg.DEDUP_BOXES > 0:
            raise NotImplementedError
            # When mapping from image ROIs to feature map ROIs, there's some aliasing
            # (some distinct image ROIs get mapped to the same feature ROI).
            # Here, we identify duplicate feature ROIs, so we only compute features
            # on the unique subset.
            # v = np.array([1, 1e3, 1e6, 1e9, 1e12])
            # hashes = np.round(net.blobs['rois'].data.copy() * cfg.DEDUP_BOXES).dot(v)
            # _, index, inv_index = np.unique(hashes, return_index=True,
            #                                 return_inverse=True)
            # rois = net.blobs['rois'][index, :]
            # boxes = boxes[index, :]

        # use softmax estimated probabilities
        scores = net.blobs['cls_score'].data.copy()
        scores = np.exp(scores)
        scores_sum = np.sum(scores, axis=1)[:, np.newaxis]
        scores /= scores_sum

        # scores = scores.max(axis=1)
        # scores = blobs_out['cls_score']

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            try:
                box_deltas = net.blobs['bbox_pred'].data.copy()
            except:
                box_deltas = net.blobs['bbox_pred_depth'].data.copy()

            box_deltas = box_deltas * self.bbox_stds + self.bbox_means
            # box_deltas = blobs_out['bbox_pred']
            pred_boxes = bbox_transform_inv(boxes, box_deltas)
            pred_boxes = clip_boxes(pred_boxes, im.shape)
        else:
            print '[Warning] Bounding-box regression is not applied at test phase.'
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        # if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        #     # Map scores and predictions back to the original set of boxes
        #     scores = scores[inv_index, :]
        #     pred_boxes = pred_boxes[inv_index, :]

        # Post-processing
        imdb = self.imdb
        thresh = 0.8

        clrs = sns.color_palette("Set2", imdb.num_classes)
        plt.figure(1, figsize=(15, 10))
        plt.clf()
        plt.imshow(im.astype(np.uint8))
        plt.gca().axis('off')

        # skip j = 0, because it's the background class
        n_det = 0
        for j in xrange(1, imdb.num_classes - 1):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = pred_boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)

            # CPU NMS is much faster than GPU NMS when the number of boxes
            # is relative small (e.g., < 10k)
            keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
            cls_dets = cls_dets[keep, :]
            n_det += len(inds)
            self.vis_detections(imdb.classes[j], cls_dets, clrs[j])

        plt.title('%d objects are detected.' % n_det)
        plt.gca().legend()
        plt.savefig(filename)
예제 #7
0
def im_detect_ori(sess, net, im, boxes=None):
    """Detect object classes in an image given object proposals, along with the stroke orientation and facial area.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
        strokes (ndarray): R x 3 array of stroke orientation class prob
        areas (ndarray): R x 9 array of facial area prob
    """

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    # forward pass
    if cfg.TEST.HAS_RPN:
        feed_dict = {
            net.data: blobs['data'],
            net.im_info: blobs['im_info'],
            net.keep_prob: 1.0
        }
    else:
        feed_dict = {
            net.data: blobs['data'],
            net.rois: blobs['rois'],
            net.keep_prob: 1.0
        }

    cls_score, cls_prob, bbox_pred, rois, eye, smile = sess.run(
        [
            net.get_output('cls_score'),
            net.get_output('cls_prob'),
            net.get_output('bbox_pred'),
            net.get_output('rois'),
            net.get_output('eye_prob'),
            net.get_output('smile_prob')
        ],
        feed_dict=feed_dict)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = cls_score
    else:
        # use softmax estimated probabilities
        scores = cls_prob

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, eye, smile
예제 #8
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = self.phase # either 'TRAIN' or 'TEST'
        if cfg_key == 0:
          cfg_ = cfg.TRAIN
        else:
          cfg_ = cfg.TEST
        pre_nms_topN  = cfg_.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg_.RPN_POST_NMS_TOP_N
        nms_thresh    = cfg_.RPN_NMS_THRESH
        min_size      = cfg_.RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
예제 #9
0
def im_detect(feature_net, embed_net, recurrent_net, im, boxes=None, use_box_at = -1):
    """Detect object classes in an image given object proposals.

    Arguments:
        feature_net (caffe.Net): CNN model for extracting features
        embed_net (caffe.Net): A word embedding layer
        recurrent_net (caffe.Net): Recurrent model for generating captions and locations
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)
        use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1)
    Returns:
        scores (ndarray): R x 1 array of object class scores 
        pred_boxes (ndarray)): R x 4 array of predicted bounding boxes
        captions (list): length R list of list of word tokens (captions)
    """

    # for bbox unnormalization
    bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1,4))
    bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1,4))

    blobs, im_scales = _get_blobs(im, boxes)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
        dtype=np.float32)

    # reshape network inputs
    feature_net.blobs['data'].reshape(*(blobs['data'].shape))
    feature_net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    
    feature_net.forward(data = im_blob, im_info = blobs['im_info'])
    region_features = feature_net.blobs['region_features'].data.copy()
    rois = feature_net.blobs['rois'].data.copy()
    # detection scores
    scores = feature_net.blobs['cls_probs'].data[:,1].copy()
    # proposal boxes
    boxes = rois[:, 1:5] / im_scales[0]
    proposal_n = rois.shape[0]
    feat_args = {'input_features': region_features}
    opt_args = {}
    # global feature as an optional input: context
    if 'global_features' in feature_net.blobs and 'global_features' in recurrent_net.blobs:
        #changed according to the global feature shape
        opt_args['global_features'] = np.tile(feature_net.blobs['global_features'].data, (1,proposal_n,1)) 
    
    bbox_pred_direct = ('bbox_pred' in feature_net.blobs)

    if bbox_pred_direct:
        # do greedy search
        captions, _, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, pred_bbox = False)
        #bbox target unnormalization
        box_offsets = feature_net.blobs['bbox_pred'].data
    else:

        captions, box_offsets, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, \
            pred_bbox = True, use_box_at = use_box_at)

    #bbox target unnormalization
    box_deltas = box_offsets * bbox_stds + bbox_mean

    #do the transformation
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)
    
    return scores, pred_boxes, captions
    def __call__(self, x, bbox_deltas, im_info):
        if isinstance(bbox_deltas.data, chainer.cuda.ndarray):
            bbox_deltas = chainer.cuda.to_cpu(bbox_deltas.data)
        if isinstance(x.data, chainer.cuda.ndarray):
            x = chainer.cuda.to_cpu(x.data)

        assert x.shape[0] == 1, 'Only single item batches are supported'

        if self.train:
            pre_nms_topN = self.TRAIN_RPN_PRE_NMS_TOP_N
            post_nms_topN = self.TRAIN_RPN_POST_NMS_TOP_N
            nms_thresh = self.TRAIN_RPN_NMS_THRESH
            min_size = self.TRAIN_RPN_MIN_SIZE
        else:
            pre_nms_topN = self.TEST_RPN_PRE_NMS_TOP_N
            post_nms_topN = self.TEST_RPN_POST_NMS_TOP_N
            nms_thresh = self.TEST_RPN_NMS_THRESH
            min_size = self.TEST_RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = x[:, self.num_anchors:, :, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self.feat_stride
        shift_y = np.arange(0, height) * self.feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self.num_anchors
        K = shifts.shape[0]
        anchors = self.anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack(
            (batch_inds, proposals.astype(np.float32, copy=False)))
        blob = chainer.cuda.cupy.asarray(blob, np.float32)
        rois = chainer.Variable(blob, volatile=not self.train)

        return rois
예제 #11
0
def _im_detect(net, im, roidb, blob_names=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        roidb (an roidb item): to provide gt_boxes if necessary
        blob_names (list of str): list of feature blob names to be extracted

    Returns:
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        features (dict of ndarray): {blob name: R x D array of features}
    """
    im_blob, im_scales = get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"

    blobs = {
        'data': im_blob,
        'im_info': np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32),
    }

    if 'gt_boxes' in net.blobs:
        # Supply gt_boxes as input. Used to get pid_labels for proposals.
        blobs['gt_boxes'] = get_gt_boxes_blob(
            roidb['boxes'], roidb['gt_classes'], roidb['gt_pids'], im_scales)

    # reshape network inputs
    for k, v in blobs.iteritems():
        net.blobs[k].reshape(*(v.shape))

    # do forward
    forward_kwargs = {k: v.astype(np.float32, copy=False)
                      for k, v in blobs.iteritems()}
    blobs_out = net.forward(**forward_kwargs)

    # unscale rois back to raw image space
    rois = net.blobs['rois'].data.copy()
    boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # the first column of the pid_prob is the non-person box score
        scores = blobs_out['pid_prob'][:, 0]
        scores = scores[:, np.newaxis]
        scores = np.hstack([scores, 1. - scores])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        # As we no longer scale and shift the bbox_pred weights when snapshot,
        # we need to manually do this during test.
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS and \
                cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            num_classes = box_deltas.shape[1] // 4
            stds = np.tile(cfg.TRAIN.BBOX_NORMALIZE_STDS, num_classes)
            means = np.tile(cfg.TRAIN.BBOX_NORMALIZE_MEANS, num_classes)
            box_deltas = box_deltas * stds + means
        boxes = bbox_transform_inv(boxes, box_deltas)
        boxes = clip_boxes(boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        boxes = np.tile(boxes, (1, scores.shape[1]))

    features = {blob: net.blobs[blob].data.copy() for blob in blob_names} \
               if blob_names is not None else {}

    return boxes, scores, features
예제 #12
0
def im_detect(net, im1, im2, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im1, im2, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data_visible']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data_visible'].reshape(*(blobs['data_visible'].shape))
    net.blobs['data_lwir'].reshape(*(blobs['data_lwir'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {
        'data_visible': blobs['data_visible'].astype(np.float32, copy=False),
        'data_lwir': blobs['data_lwir'].astype(np.float32, copy=False)
    }
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    ######################################## Draw RPN ########################################

    scores = net.blobs['rpn_scores'].data.copy()
    rois = net.blobs['rois'].data.copy()

    idx = np.where(scores > 0.9)

    if len(idx) == 0:
        pld.set_trace()

    proposals = rois[idx[0], 1:]

    img = net.blobs['data_visible'].data.copy()[0]
    img = img.transpose((1, 2, 0))
    img += cfg.PIXEL_MEANS[:, :, :3]
    img = img[:, :, (2, 1, 0)]

    plt.figure(11)
    plt.clf()
    plt.title('Proposals, score >= 0.9')
    ax = plt.gca()
    ax.imshow(img.astype(np.uint8))
    for pr in proposals:
        ax.add_patch(
            plt.Rectangle((pr[0], pr[1]),
                          pr[2] - pr[0],
                          pr[3] - pr[1],
                          fill=False,
                          edgecolor='r'))

    ###########################################################################################

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im1.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[8, 16, 32],
                   anchor_ratios=[0.5, 1, 2]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(ratios=anchor_ratios,
                                scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2])
    #print('rpn_bbox_pred 1',rpn_bbox_pred)
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])
    #print('rpn_bbox_pred 2',rpn_bbox_pred)
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #print('bbox1',bbox_deltas)
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))
    #print('anchors',anchors)
    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
    #print('bbox_deltas',bbox_deltas)

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)
    #print('proposals1',proposals)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])
    #print('proposals2',proposals)

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # remove_option = 1
    # if ('TEST' == cfg_key and remove_option in [1, 2]):
    #     # get rid of boxes that are completely inside other boxes
    #     # with options as to which one to get rid of
    #     # 1. always the one with lower scores, 2. always the one inside
    #     new_proposals = []
    #     removed_indices = set()
    #     num_props = proposals.shape[0]
    #     for i in range(num_props):
    #         if (i in removed_indices):
    #             continue
    #         bxA = proposals[i, :]
    #         for j in range(num_props):
    #             if ((j == i) or (j in removed_indices)):
    #                 continue
    #             bxB = proposals[j, :]
    #             if (bbox_contains(bxA, bxB)):
    #                 if ((1 == remove_option) and (scores[i] != scores[j])):
    #                     if (scores[i] > scores[j]):
    #                         removed_indices.add(j)
    #                     else:
    #                         removed_indices.add(i)
    #                 else: # remove_option == 2 or scores[i] == scores[j]
    #                     removed_indices.add(j)
    #     nr = len(removed_indices)
    #     if (nr > 0):
    #         new_proposals = sorted(set(range(num_props)) - removed_indices)
    #         proposals = proposals[new_proposals, :]
    #         scores = scores[new_proposals]
    #         # padding to make the total number of proposals == post_nms_topN
    #         proposals = np.vstack((proposals, [proposals[-1, :]] * nr))
    #         scores = np.vstack((scores, [scores[-1]] * nr))

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    # batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    # BUT we NOW (18-Sep-2017) abuse batch inds, and use it for carrying scores
    if ('TEST' == cfg_key):
        batch_inds = np.reshape(scores, [proposals.shape[0], 1])
    else:
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)

    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    if (DEBUG):
        print('blob shape: {0}'.format(blob.shape))
        print('proposal shape: {0}'.format(proposals.shape))
    return blob
예제 #14
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[8, 16, 32]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    if DEBUG:
        print "Proposal Layer number of anchors: {}".format(_num_anchors)

    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2])
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])
    if DEBUG:
        print "Proposal Layer rpn_cls_prob_reshape shape: {}".format(
            rpn_cls_prob_reshape.shape)
        print "Proposal Layer rpn_bbox_pred shape: {}".format(
            rpn_bbox_pred.shape)

    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print 'Proposal Layer im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'Proposal Layer scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print 'Proposal Layer score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    # Apply to all anchors 9*h*w (h is the height of feature map, w is the
    # width of feature map)
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
예제 #15
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)
    #blobs['foreverone'] = 0.0000001*np.ones((blobs['rois'].shape[0],cfg.TRAIN.mask_num,cfg.TRAIN.hidden_size),dtype = np.float32)
    #blobs['sample'] = np.random.normal(size=(blobs['rois'].shape[0],cfg.TRAIN.mask_num , cfg.TRAIN.hidden_size))
    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    #net.blobs['foreverone'].reshape(*(blobs['foreverone'].shape))
    #net.blobs['sample'].reshape(*(blobs['sample'].shape))
    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)

    #forward_kwargs['foreverone'] = blobs['foreverone'].astype(np.float32, copy=False)
    #forward_kwargs['sample'] = blobs['sample'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)
    '''
    saveto = 'test.mat'
    netdata = dict()
    tmp = net.blobs['proposal'].data
    netdata['gpu'] = tmp
    sio.savemat(saveto,netdata)
    '''

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        bbox_means = [
            0.0, 0.0, 0.0, 0.0, 1.03960042775271e-10, 0.00622199373803706,
            0.0207805908339361, 0.0524860248101128
        ]
        bbox_stds = [
            0.0, 0.0, 0.0, 0.0, 0.131444678954748, 0.125309184804088,
            0.249703604170591, 0.216150527133179
        ]
        box_deltas = box_deltas * (np.repeat(
            bbox_stds,
            box_deltas.shape[0]).reshape(box_deltas.shape[0], 8)) + np.repeat(
                bbox_means, box_deltas.shape[0]).reshape(
                    box_deltas.shape[0], 8)
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
    '''
    if net.blobs.has_key('deterministic_prob') and cfg.TEST.deterministic_prob:
        objectness = net.blobs['deterministic_prob'].data
        objectness = objectness[inv_index, :]
        for i in xrange(objectness.shape[0]):
            if objectness[i,0] > 0.5:
                scores[i,0] = 0.0
    '''
    return scores, pred_boxes
예제 #16
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
예제 #17
0
def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)
    #生成anchor的目的是将rpn预测的偏移量(代码中为delts)反变换为坐标值
    #————————————————————————————————————————————————————————————————
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2])
    rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2])
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
    min_size      = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)
#_________________________________________________________
#Note: anchor_target_layer中选择pos和neg样本总量为256,只是为了训练rpn网络的(只有标签为0,1参与计算loss),但是标签为-1的也生成了proposal
#所以proposal个数对应20000
#在总的流程是anchor --proposal ---(映射到原图)roi
#proposal并不一定只是前景框, 因为按照score选取了前6000个再送入NMS成为2000个,如果属于前景的proposal不足2000,那么proposal中也会有背景框
#所以在fast rcnn中进一步筛选

#根据图片大小裁剪proposal
    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])
#min_size * im_info[2]  16* (1/16),意思是proposal要比设置的最小proposal大
    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]
#NMS筛选 20000 -- 2000
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    #根据score(属于前景的概率)取前6000个送入NMS, proposal
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    #取前2000个(post_nms_topN=2000)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    #返回的blob,第一列全0,表示proposal索引,第1,2,3,4列是x,y,x,y,score只是为了nms选择的依据,其他地方都没用到
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
예제 #18
0
def im_detect(sess, net, im, boxes=None):
    """Detect object classes in an image given object proposals.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    # forward pass
    if cfg.TEST.HAS_RPN:
        feed_dict={net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0}
    else:
        feed_dict={net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0}

    run_options = None
    run_metadata = None
    if cfg.TEST.DEBUG_TIMELINE:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

    cls_score, cls_prob, bbox_pred, rois, mask_prob = sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois'), net.get_output('mask_prob')],
                                                    feed_dict=feed_dict,
                                                    options=run_options,
                                                    run_metadata=run_metadata)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        boxes = rois[:, 1:5] # / im_scales[0]


    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = cls_score
    else:
        # use softmax estimated probabilities
        scores = cls_prob

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        if cfg.DEBUG:
            # print box_deltas[0,:]
            # print 'cls_prob'
            # print cls_prob[0:10,:]
            # print 'boxes:'
            # print boxes[0]
            # print 'mask'
            # print mask_prob[0,:,:,1]
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
	pred_boxes /= im_scales[0]
	# print 'im_scales ======>'
	# print im_scales
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if cfg.TEST.DEBUG_TIMELINE:
        trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
        trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
        trace_file.close()

    score = np.amax(scores, axis=1)
    label = np.argmax(scores, axis=1)
    pred_box = np.zeros((pred_boxes.shape[0],4))
    mask = np.zeros(mask_prob.shape[0:3])
    for i in range(len(label)):
        l = label[i]
        pred_box[i,:] = pred_boxes[i,4*l:4*(l + 1)]
        mask[i,:,:] = mask_prob[i,:,:,l]
    if cfg.DEBUG:
        print 'scores shape: '
        print scores.shape
        print 'mask shape: '
        print mask.shape
    return score, label, pred_box, mask


def vis_detections(im, class_name, dets, thresh=0.8):
    """Visual debugging of detections."""
    import matplotlib.pyplot as plt
    #im = im[:, :, (2, 1, 0)]
    for i in xrange(np.minimum(10, dets.shape[0])):
        bbox = dets[i, :4]
        score = dets[i, -1]
        if score > thresh:
            #plt.cla()
            #plt.imshow(im)
            plt.gca().add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1], fill=False,
                              edgecolor='g', linewidth=3)
                )
            plt.gca().text(bbox[0], bbox[1] - 2,
                 '{:s} {:.3f}'.format(class_name, score),
                 bbox=dict(facecolor='blue', alpha=0.5),
                 fontsize=14, color='white')

            plt.title('{}  {:.3f}'.format(class_name, score))
    #plt.show()

def apply_nms(all_boxes, thresh):
    """Apply non-maximum suppression to all predicted boxes output by the
    test_net method.
    """
    num_classes = len(all_boxes)
    num_images = len(all_boxes[0])
    nms_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(num_classes)]
    for cls_ind in xrange(num_classes):
        for im_ind in xrange(num_images):
            dets = all_boxes[cls_ind][im_ind]
            if dets == []:
                continue

            x1 = dets[:, 0]
            y1 = dets[:, 1]
            x2 = dets[:, 2]
            y2 = dets[:, 3]
            scores = dets[:, 4]
            inds = np.where((x2 > x1) & (y2 > y1) & (scores > cfg.TEST.DET_THRESHOLD))[0]
            dets = dets[inds,:]
            if dets == []:
                continue

            keep = nms(dets, thresh)
            if len(keep) == 0:
                continue
            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
    return nms_boxes


def test_net(sess, net, imdb, weights_filename, output_dir , max_per_image=300, thresh=0.05, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    # output_dir = get_output_dir(imdb, weights_filename)
    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(sess, net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        if vis:
            image = im[:, :, (2, 1, 0)]
            plt.cla()
            plt.imshow(image)

        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(image, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets
        if vis:
           plt.show()
        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
예제 #19
0
    def forward(self, bottom, top):
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = bottom[0].data
        aaa = all_rois[:]
        # GT boxes (x1, y1, x2, y2, label)
        # TODO(rbg): it's annoying that sometimes I have extra info before
        # and other times after box coordinates -- normalize to one format
        gt_boxes = bottom[1].data
        im = bottom[2].data
        # Include ground-truth boxes in the set of candidate rois
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack(
            (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
        )
        
        num_images = 1
        rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
        fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)

        rois, labels, bbox_targets, bbox_weights ,layer_indexs = _sample_rois(
            all_rois, gt_boxes, fg_rois_per_image,
            rois_per_image, self._num_classes,sample_type='fpn', k0 = 4)
        vis =False
        if vis:
            ind = np.where(labels!=0)[0]
            im_shape = im.shape
            means = np.tile(
                     np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (21, 1)).ravel()
            stds = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (21, 1)).ravel()
            bbox_targets = bbox_targets*stds +means
            
            pred_boxes = bbox_transform_inv(rois[:,1:], bbox_targets)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
            l =labels[ind]
            ro = rois[ind,1:]
            b = bbox_targets[ind,:]
            p = pred_boxes[ind,:]*bbox_weights[ind,:]
            r = []
            for i in range(p.shape[0]):
                r.append(p[i,l[i]*4:l[i]*4+4])
            r_ =  np.vstack(r)

      #  Optionally normalize targets by a precomputed mean and stdev

            vis_all_detection(im, aaa[:,1:], l, 1)

        rois_ = np.zeros((self._batch_rois*4, 5), dtype=rois.dtype)
        labels_all = np.ones((self._batch_rois*4, ), dtype=labels.dtype)*-1
        bbox_targets_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_targets.dtype)
        bbox_weights_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_weights.dtype)
        rois_all =[]
        for i in range(4):
            index = (layer_indexs == (i + 2))
            num_index = sum(index)
           
            start = self._batch_rois*i
            end = start+num_index
            index_range = range(start, end)
            rois_[index_range, :] = rois[index, :]
            rois_all.append(rois_[range(start,start + self._batch_rois), :])
            labels_all[index_range] = labels[index]  
            bbox_targets_all[index_range,:] = bbox_targets[index, :]
            bbox_weights_all[index_range,:] = bbox_weights[index, :]


        rois_p2 = rois_all[0]
        rois_p3 = rois_all[1]
        rois_p4 = rois_all[2]
        rois_p5 = rois_all[3]    
  


        top[0].reshape(*rois_p2.shape)
        top[0].data[...] = rois_p2
    
        top[1].reshape(*rois_p3.shape)
        top[1].data[...] = rois_p3

        top[2].reshape(*rois_p4.shape)
        top[2].data[...] = rois_p4
        
        top[3].reshape(*rois_p5.shape)
        top[3].data[...] = rois_p5
        
        # classification labels
        top[4].reshape(*labels_all.shape)
        top[4].data[...] = labels_all

        # bbox_targets
        top[5].reshape(*bbox_targets_all.shape)
        top[5].data[...] = bbox_targets_all

        # bbox_inside_weights
        top[6].reshape(*bbox_weights_all.shape)
        top[6].data[...] = bbox_weights_all

        # bbox_outside_weights
        top[7].reshape(*bbox_weights_all.shape)
        top[7].data[...] = np.array(bbox_weights_all > 0).astype(np.float32)
예제 #20
0
def im_detect(net, im, feat_blob, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        feat_blob (str): name of the feature blob to be extracted
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
        features (ndarray): R x D array of features
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # the last column of the pid_prob is the non-person box score
        scores = blobs_out['pid_prob'][:, -1]
        scores = scores[:, np.newaxis]
        scores = np.hstack([scores, 1. - scores])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    features = net.blobs[feat_blob].data.copy()

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
        features = features[inv_index, :]

    return scores, pred_boxes, features
예제 #21
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    ### my
    # blobs: dict of {data, rois}, we only have data here, which is processed
    #        input to the network, bgr order and meet n*c*h*w order,
    #        h, w has meet input standard.
    # im_scales: To which extent scale the input the image, the output bounding
    #        box should be sclaled back. im_scale = 600 / min or 1000 / max
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.

    ### ignore this part, as we use rpn when tesing.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    ### im_info is [h, w, scale], h, w is in scaled manner.
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    ### here, fowrawd has finished, we can get the results now.
    #   rois: 300 proposals output from rpn network
    #   box_deltas: box refinement output from the network regression branch
    #   pred_box: shifted box and is the final bounding box
    #   scores: score of each box for each class, 21 for cifar-10
    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # print "Num of proposals is %d" % rois.shape[0]
        # rois = net.blobs['rois_1X'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
    return scores, pred_boxes
예제 #22
0
def my_im_detect(net, im):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs = {'data': None, 'rois': None}

    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []
    ##这里图片都是一样大小
    #    for target_size in cfg.TEST.SCALES:
    #        im_scale = float(target_size) / float(im_size_min)
    #        # Prevent the biggest axis from being more than MAX_SIZE
    #        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
    #            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
    #        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
    #                        interpolation=cv2.INTER_LINEAR)
    #        im_scale_factors.append(im_scale)
    #        processed_ims.append(im)

    im_scale = 1.0
    im = cv2.resize(im_orig,
                    None,
                    None,
                    fx=im_scale,
                    fy=im_scale,
                    interpolation=cv2.INTER_LINEAR)
    im_scale_factors.append(im_scale)
    processed_ims.append(im)

    max_shape = np.array([imn.shape for imn in processed_ims]).max(axis=0)
    num_images = len(processed_ims)
    blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
                    dtype=np.float32)
    for i in xrange(num_images):
        imn = processed_ims[i]
        blob[i, 0:imn.shape[0], 0:imn.shape[1], :] = imn
    # Move channels (axis 3) to axis 1
    # Axis order will become: (batch elem, channel, height, width)
    channel_swap = (0, 3, 1, 2)
    blob = blob.transpose(channel_swap)

    blobs['data'] = blob
    im_scales = np.array(im_scale_factors)

    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    net.blobs['im_info'].reshape(*(blobs['im_info'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)

    blobs_out = net.forward(**forward_kwargs)

    assert len(im_scales) == 1, "Only single-image batch implemented"
    rois = net.blobs['rois'].data.copy()
    # unscale back to raw image space
    boxes = rois[:, 1:5] / im_scales[0]

    scores = blobs_out['cls_prob']

    box_deltas = blobs_out['bbox_pred']
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)

    return scores, pred_boxes
    def forward(self, bottom, top):
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        rois = bottom[0].data

        # print ("==================================rois====================")
        # print rois

        ######## bbox_pred
        box_deltas = bottom[1].data
        ########class score
        scores = bottom[2].data
        ########image info
        im_info = bottom[3].data
        im_scale = im_info[0][2]

        # unscale back to raw image space
        boxes_0 = rois[:, 1:5] / im_scale
        pred_boxes = bbox_transform_inv(boxes_0, box_deltas)
        im_shape = [im_info[0][0], im_info[0][1]
                    ] / im_scale  #original size of input image
        boxes = clip_boxes(
            pred_boxes,
            im_shape)  #clip predicted box using original input size

        # print("=========================rois from rpn.proposal_layer")
        # print("=========================shape: " + str(rois.shape))
        # print rois

        # print("=========================rois from rpn.proposal_layer")
        # print("=========================shape: " + str(boxes.shape))
        # print boxes

        max_per_image = self._max_per_image
        thresh = self._thresh
        num_classes = scores.shape[1]
        i = 0  #only support single image
        num_images = 1
        #    all_boxes[cls][image] = N x 5 array of detections in
        #    (x1, y1, x2, y2, score)
        all_boxes = [[[] for _ in xrange(num_images)]
                     for _ in xrange(num_classes)]

        # print ("=========================num_classes: " + str(num_classes))
        # print ("=========================image size: " + str(im_shape))

        ## for each class (ignoring background class)
        for j in xrange(1, num_classes):

            # if j == 23:
            #     print ("=========================scores[:,j]. j = " + str(j))
            #     print scores[:, j]

            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) *
                              4]  #get boxes correspond to class j

            cls_dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32,
                                                               copy=False)
            # print ("===============================size of dets before nms: " + str(cls_dets.shape))
            # cfg.TEST.NMS = 0.3
            keep = nms(cls_dets, cfg.TEST.NMS)
            # print ("===============keep in rpn/pred_to_proposal_layer.py======: " + str(keep))
            cls_dets = cls_dets[keep, :]
            # print ("===============================size of dets after nms: " + str(cls_dets.shape))
            all_boxes[j][i] = cls_dets

            # print ("===================image: " + str(i) + " class: " + str(j))
            # print ("===================shape of all_boxes[j][i]: " + str(all_boxes[j][i].shape))
            # print all_boxes[j][i]

            # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
                    # print ("===================image: " + str(i) + "class: " + str(j))
                    # print ("===================shape of all_boxes[j][i]: " + str(all_boxes[j][i].shape))

        num_boxes = 0
        for j in xrange(1, num_classes):
            num_boxes = num_boxes + all_boxes[j][i].shape[0]

        # print ("===========num_boxes========:" + str(num_boxes))
        num_boxes = max(num_boxes,
                        1)  #tranh loi 'Floating point exception(core dumped)'

        rois_for_mask = np.zeros((num_boxes, 5), dtype=np.float32)
        rois_class_score = -1 * np.ones((num_boxes, 1), dtype=np.float32)
        rois_class_ind = -1 * np.ones((num_boxes, 1), dtype=np.float32)
        rois_final = np.zeros((num_boxes, 5), dtype=np.float32)

        count = 0
        for j in xrange(1, num_classes):
            all_boxes_j = all_boxes[j][i]  #boxes correspond to class j
            c = all_boxes_j.shape[0]
            if c > 0:
                coors = all_boxes_j[:, 0:4]
                cl_scores = all_boxes_j[:, 4:5]

                rois_for_mask[
                    count:count + c,
                    1:5] = coors * im_scale  # w.r.t big size, e.g., 600x1000
                rois_final[
                    count:count + c, 1:
                    5] = coors  # w.r.t. original image size. rois_final same rois_for_mask but with different scale
                rois_class_score[count:count + c, 0:1] = cl_scores
                rois_class_ind[count:count + c, 0:1] = np.tile(j, [c, 1])
                count = count + c

        # print ("===================================rois_for_mask")
        # print ("===================================shape: " + str(rois_for_mask.shape))
        # print rois_for_mask

        # rois_for_mask
        # print ("===========OK or NOT========")
        top[0].reshape(*rois_for_mask.shape)
        top[0].data[...] = rois_for_mask
        # print ("===========OK or NOT========")
        # classification score
        top[1].reshape(*rois_class_score.shape)
        top[1].data[...] = rois_class_score

        # class index
        top[2].reshape(*rois_class_ind.shape)
        top[2].data[...] = rois_class_ind

        # rois_final
        top[3].reshape(*rois_final.shape)
        top[3].data[...] = rois_final
예제 #24
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = 'TRAIN' if self.phase == caffe.TRAIN else 'TEST'  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]
        # This is for the extra RoIs: if not used, everything should be
        # the same than Faster R-CNN with RPN
        if len(bottom) > 3:
            extra_rois = bottom[3].data
            n_extra_rois = extra_rois.shape[0]
            if n_extra_rois == 1 and np.all(extra_rois[0, :] == 0):
                n_extra_rois = 0
                extra_rois = np.empty((0, 4), dtype=np.float32)
            if len(bottom) > 4:
                dontcare_rois = bottom[4].data
                n_dontcare_rois = dontcare_rois.shape[0]
                if n_dontcare_rois == 1 and np.all(dontcare_rois[0, :] == 0):
                    n_dontcare_rois = 0
                    dontcare_rois = np.empty((0, 4), dtype=np.float32)
        else:
            n_extra_rois = 0
            extra_rois = np.empty((0, 4), dtype=np.float32)
            n_dontcare_rois = 0
            dontcare_rois = np.empty((0, 4), dtype=np.float32)

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
            print 'external_rois: {}'.format(n_extra_rois)

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # External DontCare miniboxes
        if len(dontcare_rois) > 0:
            dontcare_rois = clip_boxes(dontcare_rois, im_info[:2])
            dc_overlaps = np.empty((len(proposals), len(dontcare_rois)),
                                   dtype=np.float)
            #s1 = time.time()
            bbox_overlaps(np.ascontiguousarray(proposals, dtype=np.float),
                          np.ascontiguousarray(dontcare_rois, dtype=np.float),
                          dc_overlaps, 1)
            #e1 = time.time()
            #print 'bbox_overlaps (proposal_layer) {:f} with input1: {:d}, input2: {:d} '.format(e1 - s1, len(proposals), len(dontcare_rois))

            n_squares = dc_overlaps.sum(axis=1)

            dc_roi_area = cfg.TRAIN.DONTCARE_BOX_SIDE * cfg.TRAIN.DONTCARE_BOX_SIDE * im_info[
                2] * im_info[2]

            proposal_areas = (proposals[:, 2] - proposals[:, 0] + 1) * \
                           (proposals[:, 3] - proposals[:, 1] + 1)

            overlapped_area = np.divide(np.multiply(n_squares, dc_roi_area),
                                        proposal_areas)

            keep = np.where(
                overlapped_area <= cfg.TRAIN.MIN_DONTCARE_OVERLAP)[0]

            if DEBUG:
                print 'Proposal layer saving:', len(
                    keep), 'proposals out of', len(proposals)

            proposals = proposals[keep, :]
            scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:(post_nms_topN - n_extra_rois)]
        elif post_nms_topN == 0 and n_extra_rois > 0:
            keep = []
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if DEBUG:
            print 'n_extra_rois', n_extra_rois
        if n_extra_rois > 0:
            batch_inds = np.zeros((n_extra_rois, 1), dtype=np.float32)
            a_extra_rois = np.hstack(
                (batch_inds, extra_rois.astype(np.float32, copy=False)))

        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        a_proposals = np.hstack(
            (batch_inds, proposals.astype(np.float32, copy=False)))

        if n_extra_rois > 0:
            blob = np.vstack((a_extra_rois, a_proposals))
        else:
            blob = a_proposals

        #TODO: ablation experiments
        # if n_extra_rois>0:
        #     blob = a_extra_rois

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
예제 #25
0
def im_detect(net, im, boxes=None, svm=False, layer_name='cls_prob'):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    # blobs, unused_im_scale_factors = _get_blobs(im, boxes)
    # rois_image = [0, 0, 0, im.shape[1], im.shape[0]] * unused_im_scale_factors
    # blobs['rois'] = np.vstack((rois_image, blobs['rois']))
    # net.blobs['data'].reshape(*(blobs['data'].shape))
    # net.blobs['rois'].reshape(*(blobs['rois'].shape))
    # blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False),
    #  rois=blobs['rois'].astype(np.float32, copy=False))

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    #zwang context code
    #rois_image = [0, 0, 0, im.shape[1], im.shape[0]] * im_scales
    #blobs['rois'] = np.vstack((rois_image, blobs['rois']))
    ####end

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    #print "display shapes:"
    #print blobs['data'].shape
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
        #print blobs['im_info'].shape
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))
        #print blobs['rois'].shape

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    #print "net.forward"
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM or svm:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = net.blobs[layer_name].data

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred'].copy()
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.TEST.LR_FLIP:
        flip_im = np.fliplr(im)
        im_height, im_width, _ = im.shape
        flip_boxes = boxes.copy()
        flip_boxes[:, 2] = im_width - 1 - boxes[:, 0]
        flip_boxes[:, 0] = im_width - 1 - boxes[:, 2]

        flip_blobs, im_scales = _get_blobs(flip_im, flip_boxes)

        # reshape network inputs
        net.blobs['data'].data[...] = flip_blobs['data']
        net.blobs['rois'].data[...] = flip_blobs['rois']
        flip_blobs_out = net.forward()

        flip_scores = flip_blobs_out['cls_prob']
        flip_box_deltas = flip_blobs_out['bbox_pred']
        flip_box_deltas[:, 0::4] = -flip_box_deltas[:, 0::4]

        scores = (scores + flip_scores) / 2
        box_deltas = (box_deltas + flip_box_deltas) / 2
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return net, scores, pred_boxes
예제 #26
0
def im_detect(sess, net, inputs, im, boxes, relations, bbox_reg, multi_iter):
    blobs, im_scales = _get_blobs(im, boxes)

    relations = np.array(relations,
                         dtype=np.int32)  # all possible combinations
    num_roi = blobs['rois'].shape[0]
    num_rel = relations.shape[0]

    inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations)

    feed_dict = {
        inputs['ims']: blobs['data'],
        inputs['rois']: blobs['rois'],
        inputs['relations']: relations,
        net.keep_prob: 1
    }

    for k in inputs_feed:
        feed_dict[inputs[k]] = inputs_feed[k]

    # compute relation rois
    feed_dict[inputs['rel_rois']] = \
        data_utils.compute_rel_rois(num_rel, blobs['rois'], relations)

    # Stage 1: VGG feature extraction.
    #ops_vgg = {
    #  'conv_out': net.layers['conv_out'],
    #}
    # Stage 2.
    ops = {
        'bbox_deltas': net.bbox_pred_output(multi_iter),
        'rel_probs': net.rel_pred_output(multi_iter),
        'cls_probs': net.cls_pred_output(multi_iter)
    }

    #t_start = time.time()
    #ops_vgg_value = sess.run(ops_vgg, feed_dict={
    #  inputs['ims']: blobs['data'],
    #  net.keep_prob: 1,
    #})
    #print 'VGG takes', time.time() - t_start

    #del feed_dict[inputs['ims']]
    #feed_dict[ops_vgg['conv_out']] = ops_vgg_value['conv_out']

    #t_start = time.time()
    ops_value = sess.run(ops, feed_dict=feed_dict)
    #print 'Scene takes', time.time() - t_start

    out_dict = {}
    for mi in multi_iter:
        rel_probs = None
        rel_probs_flat = ops_value['rel_probs'][mi]
        rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]])
        for i, rel in enumerate(relations):
            rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :]

        cls_probs = ops_value['cls_probs'][mi]

        if bbox_reg:
            # Apply bounding-box regression deltas
            pred_boxes = bbox_transform_inv(boxes,
                                            ops_value['bbox_deltas'][mi])
            pred_boxes = clip_boxes(pred_boxes, im.shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, cls_probs.shape[1]))

        out_dict[mi] = {
            'scores': cls_probs.copy(),
            'boxes': pred_boxes.copy(),
            'relations': rel_probs.copy()
        }
    return out_dict
예제 #27
0
def im_detect2(net, im, boxes=None):

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)

    blobs_out = net.forward(**forward_kwargs)
    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        boxes = rois[:, 1:5] / im_scales[0]

    #################### VISUALIZATION BLOB #######################

    # import matplotlib.pyplot as plt
    # conv5_2 = net.blobs['conv5_2'].data.copy()
    #
    # fig = plt.figure(figsize=(20, 20))
    # columns = 10
    # rows = 10
    # for i in range(1, columns * rows + 1):
    #     img = conv5_2[0,i,:,:]
    #     fig.add_subplot(rows, columns, i)
    #     plt.imshow(img)
    #
    # plt.savefig('gazebo2UMD_gazebo_conv5_2_100.png')

    # import matplotlib.pyplot as plt
    # fc7 = net.blobs['fc7'].data.copy()
    #
    # fig = plt.figure(figsize=(20, 20))
    # img = fc7[0:100,0:500]
    # plt.imshow(img)
    # plt.savefig('gazebo_gazebo_fc7_100X500.png')

    #################### VISUALIZATION BLOB #######################

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        # scores = net.blobs['cls_score'].data
        1
    else:
        # use softmax estimated probabilities
        # scores = blobs_out['cls_prob']
        scores = net.blobs['cls_prob'].data.copy()
        if cfg.TEST.MASK_REG:
            rois_class_score = blobs_out['rois_class_score']
            rois_class_ind = blobs_out['rois_class_ind']
            rois_final = blobs_out['rois_final']

    if cfg.TEST.BBOX_REG:
        box_deltas = net.blobs['bbox_pred'].data.copy()
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
        if cfg.TEST.MASK_REG:
            masks_out = blobs_out[
                'mask_prob']  #Nx2x14x14 where N is number of boxess
            #print '------------------ MASKS OUT SHAPE: ', masks_out.shape

            #masks_out = masks_out[:, 1, :, :]  # masks = Nx14x14 ## DO NOT remove #channel class
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if cfg.TEST.MASK_REG:
        #return scores, pred_boxes, pred_boxes_before_clip, masks
        return rois_final, rois_class_score, rois_class_ind, masks_out, scores, pred_boxes
    else:
        return scores, pred_boxes
예제 #28
0
    def forward(self, bottom, top):
        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = self._min_sizes
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want

        im_info = bottom[0].data[0, :]
        batch_size = bottom[1].data.shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        cls_prob_dict = {
            'stride64': bottom[10].data,
            'stride32': bottom[9].data,
            'stride16': bottom[8].data,
            'stride8': bottom[7].data,
            'stride4': bottom[6].data,
        }
        bbox_pred_dict = {
            'stride64': bottom[5].data,
            'stride32': bottom[4].data,
            'stride16': bottom[3].data,
            'stride8': bottom[2].data,
            'stride4': bottom[1].data,
        }

        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)

            scores = cls_prob_dict['stride' + str(s)][:,
                                                      self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)]

            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)
            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride

            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]

            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = _clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = _clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_transform_inv(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = _filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det, nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))

        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:, 2] = 16
                proposals[:, 3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1),
                                      dtype=np.float32)
                blob = np.hstack(
                    (batch_inds, proposals.astype(np.float32, copy=False)))
                top[0].reshape(*(blob.shape))
                top[0].data[...] = blob
                return
            keep = np.hstack((keep, pad))

        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # if is_train:

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob
예제 #29
0
class ProposalLayer(caffe.Layer):
    """
    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").
    """

    def setup(self, bottom, top):
        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str_)

        self._feat_stride = layer_params['feat_stride']
        anchor_scales = layer_params.get('scales', (8, 16, 32))
        self._anchors = generate_anchors(scales=np.array(anchor_scales))
        self._num_anchors = self._anchors.shape[0]

        if DEBUG:
            print 'feat_stride: {}'.format(self._feat_stride)
            print 'anchors:'
            print self._anchors

        # rois blob: holds R regions of interest, each is a 5-tuple
        # (n, x1, y1, x2, y2) specifying an image batch index n and a
        # rectangle (x1, y1, x2, y2)
        top[0].reshape(1, 5)

        # scores blob: holds scores for R regions of interest
        if len(top) > 1:
            top[1].reshape(1, 1, 1, 1)

    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # detection_output: 1*1*num_proposals*7
        det_label = bottom[0].data[0,0,:,1]
		det_conf = bottom[0].data[0,0,:,2]
		det_xmin = bottom[0].data[0,0,:,3]
		det_ymin = bottom[0].data[0,0,:,4]
		det_xmax = bottom[0].data[0,0,:,5]
		det_ymax = bottom[0].data[0,0,:,6]

		ground_truth = bottom[1].data

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
예제 #30
0
def im_detect(sess, net, im, boxes=None):
    """Detect object classes in an image given object proposals.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

    # forward pass
    if cfg.TEST.HAS_RPN:
        feed_dict = {
            net.data: blobs['data'],
            net.im_info: blobs['im_info'],
            net.keep_prob: 1.0
        }
    else:
        feed_dict = {
            net.data: blobs['data'],
            net.rois: blobs['rois'],
            net.keep_prob: 1.0
        }

    run_options = None
    run_metadata = None
    if cfg.TEST.DEBUG_TIMELINE:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

    cls_score, cls_prob, bbox_pred, rois = sess.run([
        net.get_output('cls_score'),
        net.get_output('cls_prob'),
        net.get_output('bbox_pred'),
        net.get_output('rois')
    ],
                                                    feed_dict=feed_dict,
                                                    options=run_options,
                                                    run_metadata=run_metadata)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = cls_score
    else:
        # use softmax estimated probabilities
        scores = cls_prob

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if cfg.TEST.DEBUG_TIMELINE:
        trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        trace_file = open(
            str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
        trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
        trace_file.close()

    return scores, pred_boxes
예제 #31
0
def im_detect(net, im, _t, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    _t['im_preproc'].tic()
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [np.hstack((im_blob.shape[2], im_blob.shape[3], im_scales[0]))],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))

    # do forward
    net.blobs['data'].data[...] = blobs['data']
    #forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].data[...] = blobs['im_info']
        #forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    _t['im_preproc'].toc()

    _t['im_net'].tic()
    blobs_out = net.forward()
    _t['im_net'].toc()
    #blobs_out = net.forward(**forward_kwargs)

    _t['im_postproc'].tic()

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        '''
        print('rois', rois.shape)
        print('blobs_out', blobs_out['cls_prob'].shape)
        exit(0)
        '''

        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    # use softmax estimated probabilities
    scores = blobs_out['cls_prob']


    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']

        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)

        part_boxes = []
        for i in range(3):
            part_deltas = blobs_out['part' + str(i+1) + '_pred']
            parts = bbox_transform_inv(boxes, part_deltas)
            parts = clip_boxes(parts, im.shape)
            part_boxes.append(parts)

    _t['im_postproc'].toc()

    return scores, pred_boxes, part_boxes
예제 #32
0
def im_detect(sess,
              net,
              im,
              boxes=None,
              save_vis_dir=None,
              img_name='',
              include_rpn_score=False):
    """Detect object classes in an image given object proposals.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    # forward pass
    if cfg.TEST.HAS_RPN:
        feed_dict = {
            net.data: blobs['data'],
            net.im_info: blobs['im_info'],
            net.keep_prob: 1.0
        }
    else:
        feed_dict = {
            net.data: blobs['data'],
            net.rois: blobs['rois'],
            net.keep_prob: 1.0
        }

    run_options = None
    run_metadata = None
    if cfg.TEST.DEBUG_TIMELINE:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

    #theta_tensor = tf.get_default_graph().get_tensor_by_name('spt_trans_theta')
    cls_score, cls_prob, bbox_pred, rois = sess.run([
        net.get_output('cls_score'),
        net.get_output('cls_prob'),
        net.get_output('bbox_pred'),
        net.get_output('rois')
    ],
                                                    feed_dict=feed_dict,
                                                    options=run_options,
                                                    run_metadata=run_metadata)

    if (save_vis_dir is not None and os.path.exists(save_vis_dir)):
        # first get the weights out
        with tf.variable_scope('conv5_3', reuse=True) as scope:
            conv5_3_weights = tf.get_variable("weights")

        conv5_3_weights_np, conv5_3_features, st_pool_features =\
        sess.run([conv5_3_weights, net.get_output('conv5_3'), net.get_output('pool_5')],
                  feed_dict=feed_dict,
                  options=run_options,
                  run_metadata=run_metadata)
        np.save(os.path.join(save_vis_dir, '%s_conv5_3_w.npy' % img_name),
                conv5_3_weights_np)
        np.save(os.path.join(save_vis_dir, '%s_conv5_3_f.npy' % img_name),
                conv5_3_features)
        np.save(os.path.join(save_vis_dir, '%s_st_pool_f.npy' % img_name),
                st_pool_features)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = cls_score
    else:
        # use softmax estimated probabilities
        scores = cls_prob

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        #project_bbox_inv(pred_boxes, theta) # project spatially transformed box back
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if cfg.TEST.DEBUG_TIMELINE:
        trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        trace_file = open(
            str(int(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
        trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
        trace_file.close()

    if (include_rpn_score):
        # score is a joint prob instead of conditional prob
        scores *= np.reshape(rois[:, 0], [-1, 1])
    return scores, pred_boxes
예제 #33
0
    samp_idx = random.sample(range(n_all_samples), min(n_samp, n_all_samples))
    print 'sampled {} images.'.format(len(samp_idx))
    body_boxes = np.vstack(roidb[i]['body_boxes'] for i in samp_idx)
    head_boxes = np.vstack(roidb[i]['head_boxes'] for i in samp_idx)
    print 'load {} boxes.'.format(len(body_boxes))

    # valid
    valid_inds = where_valid(head_boxes, body_boxes)
    body_boxes = body_boxes[valid_inds]
    head_boxes = head_boxes[valid_inds]

    # transform
    trans_params = bbox_transform(body_boxes, head_boxes)
    print '> trans_params:\n', trans_params
    trans_params = trans_params.mean(axis=0)
    print '> mean:\n', trans_params

    # show transformed example
    show_idx = 30
    body_boxes = roidb[show_idx]['body_boxes']
    head_trans_boxes = bbox_transform_inv(body_boxes,
        np.tile(trans_params, (len(body_boxes), 1)))

    image_path = imdb.image_path_at(show_idx)
    im = cv2.imread(image_path)[:, :, [2, 1, 0]]
    plt.imshow(im)
    ax = plt.gca()
    show_box(ax, body_boxes, head_trans_boxes)
    plt.title(image_path)
    plt.show()
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = 'TEST'  # str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
예제 #35
0
def im_detect(feature_net,
              embed_net,
              recurrent_net,
              im,
              boxes=None,
              use_box_at=-1):
    """Detect object classes in an image given object proposals.

    Arguments:
        feature_net (caffe.Net): CNN model for extracting features
        embed_net (caffe.Net): A word embedding layer
        recurrent_net (caffe.Net): Recurrent model for generating captions and locations
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)
        use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1)
    Returns:
        scores (ndarray): R x 1 array of object class scores 
        pred_boxes (ndarray)): R x 4 array of predicted bounding boxes
        captions (list): length R list of list of word tokens (captions)
    """

    # for bbox unnormalization
    bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1, 4))
    bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1, 4))

    blobs, im_scales = _get_blobs(im, boxes)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32)

    # reshape network inputs
    feature_net.blobs['data'].reshape(*(blobs['data'].shape))
    feature_net.blobs['im_info'].reshape(*(blobs['im_info'].shape))

    feature_net.forward(data=im_blob, im_info=blobs['im_info'])
    region_features = feature_net.blobs['region_features'].data.copy()
    rois = feature_net.blobs['rois'].data.copy()
    # detection scores
    scores = feature_net.blobs['cls_probs'].data[:, 1].copy()
    # proposal boxes
    boxes = rois[:, 1:5] / im_scales[0]
    proposal_n = rois.shape[0]
    feat_args = {'input_features': region_features}
    opt_args = {}
    # global feature as an optional input: context
    if 'global_features' in feature_net.blobs and 'global_features' in recurrent_net.blobs:
        #changed according to the global feature shape
        opt_args['global_features'] = np.tile(
            feature_net.blobs['global_features'].data, (1, proposal_n, 1))

    bbox_pred_direct = ('bbox_pred' in feature_net.blobs)

    if bbox_pred_direct:
        # do greedy search
        captions, _, logprobs = _greedy_search(embed_net,
                                               recurrent_net,
                                               feat_args,
                                               opt_args,
                                               proposal_n,
                                               pred_bbox=False)
        #bbox target unnormalization
        box_offsets = feature_net.blobs['bbox_pred'].data
    else:

        captions, box_offsets, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, \
            pred_bbox = True, use_box_at = use_box_at)

    #bbox target unnormalization
    box_deltas = box_offsets * bbox_stds + bbox_mean

    #do the transformation
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)

    return scores, pred_boxes, captions
예제 #36
0
def im_detect(sess, net, im, boxes=None):
    """Detect object classes in an image given object proposals.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    # forward pass
    if cfg.TEST.HAS_RPN:
        feed_dict={net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0}
    else:
        feed_dict={net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0}

    run_options = None
    run_metadata = None
    if cfg.TEST.DEBUG_TIMELINE:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

    cls_score, cls_prob, bbox_pred, rois = sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois')],
                                                    feed_dict=feed_dict,
                                                    options=run_options,
                                                    run_metadata=run_metadata)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        boxes = rois[:, 1:5] / im_scales[0]


    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = cls_score
    else:
        # use softmax estimated probabilities
        scores = cls_prob

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if cfg.TEST.DEBUG_TIMELINE:
        trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
        trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
        trace_file.close()

    return scores, pred_boxes
def im_detect(sess, net, inputs, im, boxes, bbox_reg, multi_iter):
    blobs, im_scales = _get_blobs(im, boxes)
    im_orig = im.astype(np.float32, copy=True)

    im_orig -= cfg.PIXEL_MEANS
    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    relations = []
    num_box_per_image = int(boxes.shape[0] / 10)
    for act_i in range(10):
        curr_box = boxes[act_i * num_box_per_image:(act_i + 1) *
                         num_box_per_image]
        for i in range(num_box_per_image):
            for j in range(num_box_per_image):
                # if i != j: # and i<j
                x1 = curr_box[i][0]
                y1 = curr_box[i][1]
                x2 = curr_box[j][0]
                y2 = curr_box[j][1]
                if i < j and math.sqrt(
                    (x1 - x2)**2 + (y1 - y2)**2
                ) < im_size_min / 2 and i != num_box_per_image - 1 and j != num_box_per_image - 1:
                    relations.append([
                        i + act_i * num_box_per_image,
                        j + act_i * num_box_per_image
                    ])
    if len(relations) == 0:
        relations.append([0, 1])
    print(len(relations))
    relations = np.array(relations,
                         dtype=np.int32)  # all possible combinations

    spa_relations = relations.copy()
    num_roi = blobs['rois'].shape[0]
    num_rel = relations.shape[0]
    num_spa_rel = spa_relations.shape[0]
    inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations)

    feed_dict = {
        inputs['ims']: blobs['data'],
        inputs['rois']: blobs['rois'],
        inputs['relations']: relations,
        net.keep_prob: 1
    }

    for k in inputs_feed:
        feed_dict[inputs[k]] = inputs_feed[k]

    # compute relation rois
    feed_dict[inputs['rel_rois']] = \
        data_utils.compute_rel_rois(num_spa_rel, blobs['rois'], spa_relations)

    ops = {}

    # ops['bbox_deltas'] = net.bbox_pred_output(multi_iter)
    ops['rel_probs'] = net.rel_pred_output(multi_iter)
    ops['cls_probs'] = net.cls_pred_output(multi_iter)
    ops['vert'] = net.getver()

    ops_value = sess.run(ops, feed_dict=feed_dict)

    out_dict = {}
    for mi in multi_iter:
        rel_probs = None
        rel_probs_flat = ops_value['rel_probs'][mi]
        rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]])
        for i, rel in enumerate(relations):
            rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :]

        cls_probs = ops_value['cls_probs'][mi]

        if bbox_reg:
            # Apply bounding-box regression deltas
            pred_boxes = bbox_transform_inv(boxes,
                                            ops_value['bbox_deltas'][mi])
            pred_boxes = clip_boxes(pred_boxes, im.shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, cls_probs.shape[1]))

        out_dict[mi] = {
            'scores': cls_probs.copy(),
            'boxes': pred_boxes.copy(),
            'relations': rel_probs.copy(),
            'vert': ops_value['vert'].copy()
        }

    return out_dict
예제 #38
0
    def forward(self, bottom, top):
        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size = self._min_sizes
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
    
        im_info = bottom[0].data[0, :]
        batch_size = bottom[1].data.shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        cls_prob_dict = {
            'stride64': bottom[10].data,
            'stride32': bottom[9].data,
            'stride16': bottom[8].data,
            'stride8': bottom[7].data,
            'stride4': bottom[6].data,
        }
        bbox_pred_dict = {
            'stride64': bottom[5].data,
            'stride32': bottom[4].data,
            'stride16': bottom[3].data,
            'stride8': bottom[2].data,
            'stride4': bottom[1].data,
        }
      
        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
    
            scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)]
          
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
    
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = _clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = _clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_transform_inv(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = _filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det,nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))

        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:,2] = 16
                proposals[:,3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
                top[0].reshape(*(blob.shape))
                top[0].data[...] = blob
                return      
            keep = np.hstack((keep, pad))
           
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
    
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob
예제 #39
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    ########################### used for DEBUG, ADDED BY LJ ########################
    for blob_name, blob in net.blobs.iteritems():
        print blob_name + '\t' + str(blob.data.shape)
    for para_name, para in net.params.iteritems():
        print para_name + '\t' + str(para[0].data.shape) + str(
            para[1].data.shape)

    def vis_square(data, show_type):
        '''
        Take an array of shape(n,height,width)('gray')
        and visualize each(height,width)thing in a grid of size approx.sqrt(n) by sqrt(n)
        '''
        import matplotlib.pyplot as plt
        #normalize data for display
        data = (data - data.min()) / (data.max() - data.min())
        #force the number of filters to be square
        n = int(np.ceil(np.sqrt(data.shape[0])))
        if show_type == 'feature':
            padding = ((0, n**2 - data.shape[0]), (0, 5), (0, 5))
        else:
            padding = ((0, n**2 - data.shape[0]), (0, 1), (0, 1))
        data = np.pad(data, padding, mode='constant', constant_values=0)

        data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3))
        data = data.reshape((n * data.shape[1], n * data.shape[3]))

        plt.imshow(data)
        plt.axis('off')

    for blob_name, blob in net.blobs.iteritems():
        print blob_name + '\t' + str(blob.data.shape)
        try:
            shape_val = blob.data.shape
            vis_square(blob.data.reshape(shape_val[0] * shape_val[1],
                                         shape_val[2], shape_val[3])[:64],
                       show_type='feature')
        except:
            pass
    for para_name, para in net.params.iteritems():
        print para_name + '\t' + str(para[0].data.shape) + str(
            para[1].data.shape)
        try:
            shape_val = para[0].data.shape
            vis_square(para[0].data.reshape(shape_val[0] * shape_val[1],
                                            shape_val[2], shape_val[3])[:256],
                       show_type='params')
        except:
            pass
    #################################################################################
    return scores, pred_boxes
예제 #40
0
def im_detect_array(net, imgs):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(imgs)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']

        images = []
        for i in xrange(blobs['data'].shape[0]):
            images.append(np.array(
                [[im_blob.shape[2], im_blob.shape[3], im_scales[i]]],
                dtype=np.float32))

        blobs['im_info'] = np.array(images)


    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    deviders = []
    boxes = []
    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == len(imgs), "Only one scale per image implemented"
        rois = net.blobs['rois'].data.copy()


        for idx in xrange(im_scales.shape[0] - 1):
            deviders.append(np.searchsorted(rois[:, 0], idx + 1, 'left'))

        boxes = np.split(rois[:, 1:5], deviders)

        # unscale back to raw image space
        for idx in xrange(len(imgs)):
            boxes[idx] = boxes[idx] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = np.split(net.blobs['cls_score'].data, deviders)

    else:
        # use softmax estimated probabilities
        scores = np.split(blobs_out['cls_prob'], deviders)

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = np.split(blobs_out['bbox_pred'], deviders)

        pred_boxes = []
        for idx in xrange(im_scales.shape[0]):
            pred_boxes.append(bbox_transform_inv(boxes[idx], box_deltas[idx]))
            pred_boxes[idx] = clip_boxes(pred_boxes[idx], imgs[idx].shape)

    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
예제 #41
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str('TRAIN' if self.phase == 0 else 'TEST') # either 'TRAIN' or 'TEST'
        if cfg.TRAIN.FrozenTraing:
            cfg_key = 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE
        if cfg.TRAIN.MULTI_SCALE_RPN == 1:
            rpn_proposals = []
            rpn_scores = []
            for i, ANCHOR_SCALES in enumerate(cfg.TRAIN.MULTI_SCALE_RPN_SCALE):
                pre_nms_topN = cfg[cfg_key].MULTI_SCALE_RPN_PRE_NMS_TOP_Ns[i]
                post_nms_topN = cfg[cfg_key].MULTI_SCALE_RPN_POST_NMS_TOP_Ns[i]
                # the first set of _num_anchors channels are bg probs
                # the second set are the fg probs, which we want
                scores = bottom[0+i*2].data[:, self._num_anchors[i]:, :, :]
                bbox_deltas = bottom[1+i*2].data
                im_info = bottom[-1].data[0, :]

                # 1. Generate proposals from bbox deltas and shifted anchors
                height, width = scores.shape[-2:]

                # Enumerate all shifts
                shift_x = np.arange(0, width) * self._feat_stride[i]
                shift_y = np.arange(0, height) * self._feat_stride[i]
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                    shift_x.ravel(), shift_y.ravel())).transpose()

                # Enumerate all shifted anchors:
                #
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = self._num_anchors[i]
                K = shifts.shape[0]
                anchors = self._anchors[i].reshape((1, A, 4)) + \
                          shifts.reshape((1, K, 4)).transpose((1, 0, 2))
                anchors = anchors.reshape((K * A, 4))

                # Transpose and reshape predicted bbox transformations to get them
                # into the same order as the anchors:
                #
                # bbox deltas will be (1, 4 * A, H, W) format
                # transpose to (1, H, W, 4 * A)
                # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
                # in slowest to fastest order
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

                # Same story for the scores:
                #
                # scores are (1, A, H, W) format
                # transpose to (1, H, W, A)
                # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # Convert anchors into proposals via bbox transformations
                proposals = bbox_transform_inv(anchors, bbox_deltas)

                # 2. clip predicted boxes to image
                proposals = clip_boxes(proposals, im_info[:2])

                # 3. remove predicted boxes with either height or width < threshold
                # (NOTE: convert min_size to input image scale stored in im_info[2])
                keep = _filter_boxes(proposals, min_size * im_info[2])
                proposals = proposals[keep, :]
                scores = scores[keep]

                # 4. sort all (proposal, score) pairs by score from highest to lowest
                # 5. take top pre_nms_topN (e.g. 6000)
                order = scores.ravel().argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]

                # 6. apply nms (e.g. threshold = 0.7)
                # 7. take after_nms_topN (e.g. 300)
                # 8. return the top proposals (-> RoIs top)
                nms_keep = nms(np.hstack((proposals, scores)), nms_thresh)
                if post_nms_topN > 0:
                    nms_keep = nms_keep[:post_nms_topN]
                proposals = proposals[nms_keep, :]
                scores = scores[nms_keep]
                rpn_proposals.append(proposals)
                rpn_scores.append(scores)

            # concat several groups of proposals from other rpn maps
            # concat all proposals
            proposals = np.vstack(rpn_proposals)
            scores = np.vstack(rpn_scores)

            # Output rois blob
            # Our RPN implementation only supports a single input image, so all
            # batch inds are 0
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
            # print blob.shape
            top[0].reshape(*(blob.shape))
            top[0].data[...] = blob

            if cfg.TRAIN.RPN_KP_REGRESSION:
                # timer = Timer()
                # timer.tic()
                keyPoint_deltas = bottom[3].data
                keyPoints_num = cfg.TRAIN.ATTRIBUTES[0]['gt_keyPoints']
                keyPoint_deltas = keyPoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, keyPoints_num))

                # m 1
                # keyPoint_proposals = keyPoints_transform_inv(anchors, keyPoint_deltas)
                # keyPoint_proposals = keyPoint_proposals[keep, :]
                # keyPoint_proposals = keyPoint_proposals[order, :]
                # keyPoint_proposals = keyPoint_proposals[nms_keep, :]

                # m2
                anchors_t = anchors[keep, :]
                anchors_t = anchors_t[order, :]
                anchors_t = anchors_t[nms_keep, :]
                keyPoint_deltas_t = keyPoint_deltas[keep, :]
                keyPoint_deltas_t = keyPoint_deltas_t[order, :]
                keyPoint_deltas_t = keyPoint_deltas_t[nms_keep, :]
                keyPoint_proposals = keyPoints_transform_inv(anchors_t, keyPoint_deltas_t)

                blob = np.hstack((batch_inds, keyPoint_proposals.astype(np.float32, copy=False)))
                # print blob.shape
                top[1].reshape(*(blob.shape))
                top[1].data[...] = blob

                # [Optional] output scores blob
                if len(top) > 2:
                    top[2].reshape(*(scores.shape))
                    top[2].data[...] = scores
                # timer.toc()
                # print ('proposal took {:.3f}s').format(timer.total_time)
            else:
                # [Optional] output scores blob
                if len(top) > 1:
                    top[1].reshape(*(scores.shape))
                    top[1].data[...] = scores
        else:
            # the first set of _num_anchors channels are bg probs
            # the second set are the fg probs, which we want
            scores = bottom[0].data[:, self._num_anchors:, :, :]
            bbox_deltas = bottom[1].data
            im_info = bottom[2].data[0, :]

            if DEBUG:
                print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                print 'scale: {}'.format(im_info[2])

            # 1. Generate proposals from bbox deltas and shifted anchors
            height, width = scores.shape[-2:]

            if DEBUG:
                print 'score map size: {}'.format(scores.shape)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * self._feat_stride
            shift_y = np.arange(0, height) * self._feat_stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = self._anchors.reshape((1, A, 4)) + \
                      shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_transform_inv(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = _filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            if len(bottom) > 30:
                # forward anchor from con4_3 and combine all anchor

                # the first set of _num_anchors channels are bg probs
                # the second set are the fg probs, which we want
                scores_from_conv4_3 = bottom[3].data[:, self._num_anchors_from_conv4_3:, :, :]
                bbox_deltas_from_conv4_3 = bottom[4].data
                im_info = bottom[2].data[0, :]

                if DEBUG:
                    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                    print 'scale: {}'.format(im_info[2])

                # 1. Generate proposals from bbox deltas and shifted anchors
                height_from_conv4_3, width_from_conv4_3 = scores_from_conv4_3.shape[-2:]

                if DEBUG:
                    print 'score map size: {}'.format(scores_from_conv4_3.shape)

                # Enumerate all shifts
                shift_x = np.arange(0, width_from_conv4_3) * self._feat_stride_from_conv4_3
                shift_y = np.arange(0, height_from_conv4_3) * self._feat_stride_from_conv4_3
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts_from_conv4_3 = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                    shift_x.ravel(), shift_y.ravel())).transpose()

                # Enumerate all shifted anchors:
                #
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = self._num_anchors_from_conv4_3
                K = shifts_from_conv4_3.shape[0]
                anchors_from_conv4_3 = self._anchors_from_conv4_3.reshape((1, A, 4)) + \
                          shifts_from_conv4_3.reshape((1, K, 4)).transpose((1, 0, 2))
                anchors_from_conv4_3 = anchors_from_conv4_3.reshape((K * A, 4))

                # Transpose and reshape predicted bbox transformations to get them
                # into the same order as the anchors:
                #
                # bbox deltas will be (1, 4 * A, H, W) format
                # transpose to (1, H, W, 4 * A)
                # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
                # in slowest to fastest order
                bbox_deltas_from_conv4_3 = bbox_deltas_from_conv4_3.transpose((0, 2, 3, 1)).reshape((-1, 4))

                # Same story for the scores:
                #
                # scores are (1, A, H, W) format
                # transpose to (1, H, W, A)
                # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
                scores_from_conv4_3 = scores_from_conv4_3.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # Convert anchors into proposals via bbox transformations
                proposals_from_conv4_3 = bbox_transform_inv(anchors_from_conv4_3, bbox_deltas_from_conv4_3)

                # 2. clip predicted boxes to image
                proposals_from_conv4_3 = clip_boxes(proposals_from_conv4_3, im_info[:2])

                # 3. remove predicted boxes with either height or width < threshold
                # (NOTE: convert min_size to input image scale stored in im_info[2])
                keep = _filter_boxes(proposals_from_conv4_3, min_size * im_info[2])
                proposals_from_conv4_3 = proposals_from_conv4_3[keep, :]
                scores_from_conv4_3 = scores_from_conv4_3[keep]

                # 3.1 combine all anchor from conv4_3 and conv5_3
                # strategy 1
                # proposals = np.vstack((proposals, proposals_from_conv4_3))
                # scores = np.vstack((scores, scores_from_conv4_3))

                # strategy 2
                # sort all anchors from conv5_3 and conv4_3 and use nmx before combine them
                # order = scores.ravel().argsort()[::-1]
                # if pre_nms_topN > 0:
                #     order = order[:pre_nms_topN]
                # proposals = proposals[order, :]
                # scores = scores[order]
                # keep = nms(np.hstack((proposals, scores)), nms_thresh)
                # if post_nms_topN > 0:
                #     keep = keep[:post_nms_topN]
                # proposals = proposals[keep, :]
                # scores = scores[keep]
                #
                # order = scores_from_conv4_3.ravel().argsort()[::-1]
                # if pre_nms_topN > 0:
                #     order = order[:pre_nms_topN]
                # proposals_from_conv4_3 = proposals_from_conv4_3[order, :]
                # scores_from_conv4_3 = scores_from_conv4_3[order]
                # keep = nms(np.hstack((proposals_from_conv4_3, scores_from_conv4_3)), nms_thresh)
                # if post_nms_topN > 0:
                #     keep = keep[:post_nms_topN]
                # proposals_from_conv4_3 = proposals_from_conv4_3[keep, :]
                # scores_from_conv4_3 = scores_from_conv4_3[keep]
                #
                # proposals = np.vstack((proposals, proposals_from_conv4_3))
                # scores = np.vstack((scores, scores_from_conv4_3))

                # strategy 3
                # proposals = proposals_from_conv4_3
                # scores = scores_from_conv4_3
                # ------------------------------

            # 4. sort all (proposal, score) pairs by score from highest to lowest
            # 5. take top pre_nms_topN (e.g. 6000)
            order = scores.ravel().argsort()[::-1]
            if pre_nms_topN > 0:
                order = order[:pre_nms_topN]
            proposals = proposals[order, :]
            scores = scores[order]

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            nms_keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                nms_keep = nms_keep[:post_nms_topN]
            proposals = proposals[nms_keep, :]
            scores = scores[nms_keep]

            # if cfg.RPN_FILTER:
            #     scores_i = np.where(scores[:, 0] > cfg.RPN_FILTER_thresh)
            #     if len(scores_i[0]) == 0:
            #         proposals = proposals[:5]
            #         scores = scores[:5]
            #     else:
            #         proposals = proposals[scores_i]
            #         scores = scores[scores_i]
            #     areas = (proposals[:,2]-proposals[:,0])*(proposals[:,3]-proposals[:,1])
            #     argmax_area_i = np.argmax(areas)
            #     proposals = proposals[argmax_area_i, np.newaxis]
            #     scores = scores[argmax_area_i, np.newaxis]

            # concat several groups of proposals from other rpn maps
            if cfg.RPN_PYRAMID_MORE:
                RPN_PYRAMID_NUM = cfg.RPN_PYRAMID_NUM
                for j in range(1, RPN_PYRAMID_NUM):
                    # the first set of _num_anchors channels are bg probs
                    # the second set are the fg probs, which we want
                    scores_extend = bottom[1+2*j].data[:, self._num_anchors:, :, :]
                    bbox_deltas = bottom[2+2*j].data

                    if DEBUG:
                        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                        print 'scale: {}'.format(im_info[2])

                    # 1. Generate proposals from bbox deltas and shifted anchors
                    height_extend, width_extend = scores_extend.shape[-2:]

                    if DEBUG:
                        print 'score map size: {}'.format(scores_extend.shape)

                    # Enumerate all shifts
                    shift_x = np.arange(0, width_extend) * self._feat_stride
                    shift_y = np.arange(0, height_extend) * self._feat_stride
                    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                        shift_x.ravel(), shift_y.ravel())).transpose()

                    # Enumerate all shifted anchors:
                    #
                    # add A anchors (1, A, 4) to
                    # cell K shifts (K, 1, 4) to get
                    # shift anchors (K, A, 4)
                    # reshape to (K*A, 4) shifted anchors
                    A = self._num_anchors
                    K = shifts.shape[0]
                    anchors = self._anchors.reshape((1, A, 4)) + \
                              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
                    anchors = anchors.reshape((K * A, 4))

                    # Transpose and reshape predicted bbox transformations to get them
                    # into the same order as the anchors:
                    #
                    # bbox deltas will be (1, 4 * A, H, W) format
                    # transpose to (1, H, W, 4 * A)
                    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
                    # in slowest to fastest order
                    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

                    # Same story for the scores:
                    #
                    # scores are (1, A, H, W) format
                    # transpose to (1, H, W, A)
                    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
                    scores_extend = scores_extend.transpose((0, 2, 3, 1)).reshape((-1, 1))

                    # Convert anchors into proposals via bbox transformations
                    proposals_extend = bbox_transform_inv(anchors, bbox_deltas)

                    # 2. clip predicted boxes to image
                    proposals_extend = clip_boxes(proposals_extend, im_info[:2])

                    # 3. remove predicted boxes with either height or width < threshold
                    # (NOTE: convert min_size to input image scale stored in im_info[2])
                    keep = _filter_boxes(proposals_extend, min_size * im_info[2])
                    proposals_extend = proposals_extend[keep, :]
                    scores_extend = scores_extend[keep]

                    # 4. sort all (proposal, score) pairs by score from highest to lowest
                    # 5. take top pre_nms_topN (e.g. 6000)
                    order = scores_extend.ravel().argsort()[::-1]
                    if pre_nms_topN > 0:
                        order = order[:pre_nms_topN]
                    proposals_extend = proposals_extend[order, :]
                    scores_extend = scores_extend[order]

                    # 6. apply nms (e.g. threshold = 0.7)
                    # 7. take after_nms_topN (e.g. 300)
                    # 8. return the top proposals (-> RoIs top)
                    keep = nms(np.hstack((proposals_extend, scores_extend)), nms_thresh)
                    if post_nms_topN > 0:
                        keep = keep[:post_nms_topN]
                    proposals_extend = proposals_extend[keep, :]
                    scores_extend = scores_extend[keep]

                    # 9 concat all proposals
                    proposals = np.vstack((proposals, proposals_extend))
                    scores = np.vstack((scores, scores_extend))

            # Output rois blob
            # Our RPN implementation only supports a single input image, so all
            # batch inds are 0
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
            # print blob.shape
            top[0].reshape(*(blob.shape))
            top[0].data[...] = blob

            if cfg.TRAIN.RPN_KP_REGRESSION:
                # timer = Timer()
                # timer.tic()
                keyPoint_deltas = bottom[3].data
                keyPoints_num = cfg.TRAIN.ATTRIBUTES[0]['gt_keyPoints']
                keyPoint_deltas = keyPoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, keyPoints_num))

                # m 1
                # keyPoint_proposals = keyPoints_transform_inv(anchors, keyPoint_deltas)
                # keyPoint_proposals = keyPoint_proposals[keep, :]
                # keyPoint_proposals = keyPoint_proposals[order, :]
                # keyPoint_proposals = keyPoint_proposals[nms_keep, :]

                # m2
                anchors_t = anchors[keep, :]
                anchors_t = anchors_t[order, :]
                anchors_t = anchors_t[nms_keep, :]
                keyPoint_deltas_t = keyPoint_deltas[keep, :]
                keyPoint_deltas_t = keyPoint_deltas_t[order, :]
                keyPoint_deltas_t = keyPoint_deltas_t[nms_keep, :]
                keyPoint_proposals = keyPoints_transform_inv(anchors_t, keyPoint_deltas_t)

                blob = np.hstack((batch_inds, keyPoint_proposals.astype(np.float32, copy=False)))
                # print blob.shape
                top[1].reshape(*(blob.shape))
                top[1].data[...] = blob

                # [Optional] output scores blob
                if len(top) > 2:
                    top[2].reshape(*(scores.shape))
                    top[2].data[...] = scores
                # timer.toc()
                # print ('proposal took {:.3f}s').format(timer.total_time)
            else:
                # [Optional] output scores blob
                if len(top) > 1:
                    top[1].reshape(*(scores.shape))
                    top[1].data[...] = scores

        if cfg.PYRAMID_MORE:
            PYRAMID_NUM = len(cfg.PYRAMID_MORE_ANCHORS)
            for i in range(1, PYRAMID_NUM):
                # the first set of _num_anchors channels are bg probs
                # the second set are the fg probs, which we want
                scores = bottom[2+2*i-1].data[:, self._num_anchors_from_extends[i-1]:, :, :]
                bbox_deltas = bottom[2+2*i].data

                # 1. Generate proposals from bbox deltas and shifted anchors
                height, width = scores.shape[-2:]

                if DEBUG:
                    print 'score map size: {}'.format(scores.shape)

                # Enumerate all shifts
                shift_x = np.arange(0, width) * self._feat_stride_from_extends[i-1]
                shift_y = np.arange(0, height) * self._feat_stride_from_extends[i-1]
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                    shift_x.ravel(), shift_y.ravel())).transpose()

                # Enumerate all shifted anchors:
                #
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = self._num_anchors_from_extends[i-1]
                K = shifts.shape[0]
                anchors = self._anchors_from_extends[i-1].reshape((1, A, 4)) + \
                          shifts.reshape((1, K, 4)).transpose((1, 0, 2))
                anchors = anchors.reshape((K * A, 4))

                # Transpose and reshape predicted bbox transformations to get them
                # into the same order as the anchors:
                #
                # bbox deltas will be (1, 4 * A, H, W) format
                # transpose to (1, H, W, 4 * A)
                # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
                # in slowest to fastest order
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

                # Same story for the scores:
                #
                # scores are (1, A, H, W) format
                # transpose to (1, H, W, A)
                # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # Convert anchors into proposals via bbox transformations
                proposals = bbox_transform_inv(anchors, bbox_deltas)

                # 2. clip predicted boxes to image
                proposals = clip_boxes(proposals, im_info[:2])

                # 3. remove predicted boxes with either height or width < threshold
                # (NOTE: convert min_size to input image scale stored in im_info[2])
                keep = _filter_boxes(proposals, min_size * im_info[2])
                proposals = proposals[keep, :]
                scores = scores[keep]

                # 4. sort all (proposal, score) pairs by score from highest to lowest
                # 5. take top pre_nms_topN (e.g. 6000)
                order = scores.ravel().argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]

                # 6. apply nms (e.g. threshold = 0.7)
                # 7. take after_nms_topN (e.g. 300)
                # 8. return the top proposals (-> RoIs top)
                keep = nms(np.hstack((proposals, scores)), nms_thresh)
                if post_nms_topN > 0:
                    keep = keep[:post_nms_topN]
                proposals = proposals[keep, :]
                scores = scores[keep]

                # Output rois blob
                # Our RPN implementation only supports a single input image, so all
                # batch inds are 0
                batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
                # print blob.shape
                top[i].reshape(*(blob.shape))
                top[i].data[...] = blob
예제 #42
0
    def forward(self, bottom, top):

        #conv5_3 = np.copy(bottom[0].data)
        assert(bottom[0].data.shape[0] == 1)
        box_deltas = np.copy(bottom[1].data)
        scores =  np.copy(bottom[2].data)
        gt_mask_fg =  np.copy(bottom[3].data)
        #print(np.mean(gt_mask_fg))
        onlyface = np.copy(bottom[0].data)
        rois = np.copy(bottom[4].data)
        im_info = np.copy(bottom[5].data)

        boxes = rois[:, 1:5] 
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1])
        boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1])))


        if np.all(np.unique(gt_mask_fg) == 1):
            ## masks for imges other than occlude are set ones
            onlyface = np.zeros(onlyface.shape)
            gt_mask_fg = np.zeros(gt_mask_fg.shape)
            #print(np.sum(gt_mask_fg))
        else:
            #print('nonzero input !!!')
            CONF_THRESH = 0.6
            NMS_THRESH = 0.25
            zoom = 16

            #find face areas
            cls_ind = 1
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                    cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]

            keep = np.where(dets[:, 4] > CONF_THRESH)
            dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 
            #enlarge boxes 
            # dets[:,:4] *= 1.1 
        #    print(dets)
        #    print(dets.shape)
        #    print(bottom[3].data.shape)
        #    print(bottom[0].data.shape)
            #generate a mask for gt mask
            mask4gt = np.zeros(bottom[3].data.shape)
            for each in dets:
                mask4gt[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1

      #      gt_mask_fg *= mask4gt

            # map to conv5_3
            dets[:,:4] //= zoom 

            #generate a mask for conv5_3
            mask4conv = np.zeros(bottom[0].data.shape)
            for each in dets:
                mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1

     #       onlyface *= mask4conv
            self.onlyface_mask = mask4conv

#        print(np.sum(onlyface))

        top_ind = self._name_to_top_map['onlyface']
        top[top_ind].reshape(*(onlyface.shape))
        top[top_ind].data[...] = onlyface.astype(np.float32, copy=False)

        top_ind = self._name_to_top_map['gt_mask_fg']
        top[top_ind].reshape(*(gt_mask_fg.shape))
        top[top_ind].data[...] = gt_mask_fg.astype(np.float32, copy=False)
예제 #43
0
    def forward(self, bottom, top):

        #conv5_3 = np.copy(bottom[0].data)
        assert(bottom[0].data.shape[0] == 1)
        box_deltas = np.copy(bottom[1].data)
        scores =  np.copy(bottom[2].data)
        onlyface = np.copy(bottom[0].data)
        rois = np.copy(bottom[3].data)
        im_info = np.copy(bottom[4].data)
        #print('layer rois: ',rois)
        boxes = rois[:, 1:5] 
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1])
        boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1])))
        #print('im_info',(int(im_info[0][0]),int(im_info[0][1]),int(im_info[0][2])))


        CONF_THRESH = 0.65
        NMS_THRESH = 0.15
        zoom = 16


       # print('layerbox:', boxes)
        #find face areas
        cls_ind = 1
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]

        keep = np.where(dets[:, 4] > CONF_THRESH)
        dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 
        #enlarge boxes 
        #print('dddets: ',dets)
    #    dets[:,:4] *= 1 
    #    print(dets)
    #    print(dets.shape)
    #    print(bottom[3].data.shape)
    #    print(bottom[0].data.shape)
        #generate a mask for gt mask
        # mask4gt = np.zeros(bottom[3].data.shape)
        # for each in dets:
        #     mask4gt[:,:,each[0]:each[2]+1,each[1]:each[3]+1] = 1

        # gt_mask_fg *= mask4gt
        
     
        # map to conv5_3
        dets[:,:4] //= zoom 
        #print('conv53:', bottom[0].data.shape)
        #print('premask: ',dets.shape)
        #generate a mask for conv5_3
        mask4conv = np.zeros(bottom[0].data.shape)
        for each in dets:
            mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1

#        pickle.dump(mask4conv, open("vis.txt", "w"))
        onlyface *= mask4conv
        self.onlyface_mask = mask4conv

#        print(np.sum(onlyface))

        top_ind = self._name_to_top_map['onlyface']
        top[top_ind].reshape(*(onlyface.shape))
        top[top_ind].data[...] = onlyface.astype(np.float32, copy=False)
예제 #44
0
파일: test.py 프로젝트: attendfov/FPN-1
def im_detect(net, im, boxes=None,num_classes=21):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)



    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] 
        index= np.where(np.sum(boxes,axis=1)!=0)[0]
        boxes = boxes[index,:]
     
# / im_scales[0]
    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']
        scores = scores[index]

      #  print scores[0:10]
    
    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
    
        box_deltas = box_deltas[index,:]
     

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            means = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)).ravel()
            stds = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)).ravel()
      #  Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas * stds + means
        

      #  print boxes.shape,box_deltas.shape
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        s = (blobs['data'].astype(np.float32, copy=False).shape[2],blobs['data'].astype(np.float32, copy=False).shape[3],blobs['data'].astype(np.float32, copy=False).shape[1])
 
        pred_boxes = clip_boxes(pred_boxes, s)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    vis = False
    if vis:
        vis_rois_detection(blobs['data'].astype(np.float32, copy=False),pred_boxes/ im_scales[0])
  

    return scores, pred_boxes/ im_scales[0]
예제 #45
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        self._num_anchors = bottom[0].shape[0] / 2
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[1].data[:, 1:, :, :]
        sio.savemat('scores',{'scores':scores})
        bbox_deltas = bottom[2].data
        im_info = bottom[3].data[0, :]
        anchors = bottom[0].data
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas[0, :, :, 0]
        bbox_deltas = bbox_deltas.reshape((-1, 4))
        #bbox_deltas = bbox_deltas.transpose(1, 0)

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores