Esempio n. 1
0
 def __init__(self, feat_stride, scales, ratios, output_score,
              rpn_pre_nms_top_n, rpn_post_nms_top_n, nms_threshold,
              rpn_min_size):
     super(ProposalOperator, self).__init__()
     self._feat_stride = feat_stride
     self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',')
     self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',')
     self._anchors = generate_anchors(base_size=self._feat_stride,
                                      scales=self._scales,
                                      ratios=self._ratios)
     self._num_anchors = self._anchors.shape[0]
     self._output_score = output_score
     self._rpn_pre_nms_top_n = rpn_pre_nms_top_n
     self._rpn_post_nms_top_n = rpn_post_nms_top_n
     self._nms_thresh = nms_threshold
     self._rpn_min_size = rpn_min_size
    def __init__(self, feat_stride, scales, ratios, output_score,
                 rpn_pre_nms_top_n, rpn_post_nms_top_n, threshold, rpn_min_size):
        super(ProposalOperator, self).__init__()
        self._feat_stride = feat_stride
        self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',')
        self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',')
        self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios)
        self._num_anchors = self._anchors.shape[0]
        self._output_score = output_score
        self._rpn_pre_nms_top_n = rpn_pre_nms_top_n
        self._rpn_post_nms_top_n = rpn_post_nms_top_n
        self._threshold = threshold
        self._rpn_min_size = rpn_min_size

        if DEBUG:
            print 'feat_stride: {}'.format(self._feat_stride)
            print 'anchors:'
            print self._anchors
Esempio n. 3
0
    def __init__(self, feat_stride, scales, ratios, output_score,
                 rpn_pre_nms_top_n, rpn_post_nms_top_n, threshold,
                 rpn_min_size):
        super(ProposalQuadrangleOperator, self).__init__()
        self._feat_stride = feat_stride
        self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',')
        self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',')
        self._anchors = generate_anchors(base_size=self._feat_stride,
                                         scales=self._scales,
                                         ratios=self._ratios)
        self._num_anchors = self._anchors.shape[0]
        self._output_score = output_score
        self._rpn_pre_nms_top_n = rpn_pre_nms_top_n
        self._rpn_post_nms_top_n = rpn_post_nms_top_n
        self._threshold = threshold
        self._rpn_min_size = rpn_min_size

        if DEBUG:
            print 'feat_stride: {}'.format(self._feat_stride)
            print 'anchors:'
            print self._anchors
Esempio n. 4
0
    def forward(self, is_train, req, in_data, out_data, aux):
        before_pyramid_proposal = datetime.now()
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        LAYER_NUM = len(in_data) / 2
        LAYER_NUM = 11
        if LAYER_NUM == 7:
            cls_prob_dict = {
                'stride64': in_data[6],
                'stride32': in_data[5],
                'stride16': in_data[4],
                'stride8': in_data[3],
                'stride4': in_data[2],
                'stride2': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[13],
                'stride32': in_data[12],
                'stride16': in_data[11],
                'stride8': in_data[10],
                'stride4': in_data[9],
                'stride2': in_data[8],
                'stride1': in_data[7],
            }

        elif LAYER_NUM == 6:
            cls_prob_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride16': in_data[3],
                'stride8': in_data[2],
                'stride4': in_data[1],
                'stride2': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[11],
                'stride32': in_data[10],
                'stride16': in_data[9],
                'stride8': in_data[8],
                'stride4': in_data[7],
                'stride2': in_data[6],
            }

        elif LAYER_NUM == 5:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
                'stride16': in_data[2],
                'stride8': in_data[1],
                'stride4': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
                'stride16': in_data[7],
                'stride8': in_data[6],
                'stride4': in_data[5],
            }
        elif LAYER_NUM == 2:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
            }
        elif LAYER_NUM == 11:
            cls_prob_dict = {
                'stride64': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[1],
            }
        elif LAYER_NUM == 1:
            cls_prob_dict = {
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride1': in_data[1],
            }
        elif LAYER_NUM == 3:
            cls_prob_dict = {
                'stride64': in_data[2],
                'stride32': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride1': in_data[3],
            }
        '''
        cls_prob_dict = {
            'stride8': in_data[3],
            'stride4': in_data[2],
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride8': in_data[7],
            'stride4': in_data[6],
            'stride2': in_data[5],
            'stride1': in_data[4],
        }
        '''
        '''
        cls_prob_dict = {
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride2': in_data[3],
            'stride1': in_data[2],
        }        
        '''
        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []

        channel_list = []

        before_feat = datetime.now()

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape)
            scores = cls_prob_dict['stride' +
                                   str(s)].asnumpy()[:,
                                                     self._num_anchors:, :, :]

            if DEBUG:
                scores1 = cls_prob_dict['stride' + str(s)].asnumpy()
                print "scores.shape:" + str(scores.shape)
                print "scores1.shape:" + str(scores1.shape)

            #print "scores.shape:"+str(scores.shape)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            #print "bbox_deltas.shape:"+str(bbox_deltas.shape)
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts

            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            before_enume = datetime.now()
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            after_enume = datetime.now()
            #print "enume time:"+str((after_enume-before_enume).seconds)
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order

            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))

            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
            if DEBUG:
                print "scores[:100]:" + str(scores[:50])
            channels = np.ones((scores.shape)) * stride

            # Convert anchors into proposals via bbox transformations
            before_pred = datetime.now()
            proposals = bbox_pred(anchors, bbox_deltas)
            after_pred = datetime.now()
            #print "pred_time:"
            #print (after_pred-before_pred).seconds
            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])
            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            if DEBUG:
                print str(min_size)
                print str(im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            if DEBUG:
                print "proposals3:" + str(proposals[0:10])
            scores = scores[keep]

            channels = channels[keep]

            proposal_list.append(proposals)
            score_list.append(scores)
            channel_list.append(channels)
        after_feat = datetime.now()
        #print "feat time:"
        #print (after_feat-before_feat).seconds

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)
        channels = np.vstack(channel_list)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        before_sort = datetime.now()
        order = scores.ravel().argsort()[::-1]
        after_sort = datetime.now()
        #print "sort time:"
        #print (after_sort-before_sort).seconds
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        channels = channels[order]
        if DEBUG:
            print '-------1-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)

        keep = nms(det)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        channels = channels[keep]
        if DEBUG:
            print '-------2-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
            print "scores:" + str(scores[0:20])
        f_chan = open('channels.txt', 'w')
        for ii in range(channels.shape[0]):
            f_chan.write(str(channels[ii][0]) + ' ')
        f_chan.close()

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        #print "out_data[0].shape"+str(out_data[0].shape)
        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
        after_pyramid_proposal = datetime.now()
Esempio n. 5
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cls_prob_dict = {
            'stride64': in_data[4],
            'stride32': in_data[3],
            'stride16': in_data[2],
            'stride8': in_data[1],
            'stride4': in_data[0],
        }
        bbox_pred_dict = {
            'stride64': in_data[9],
            'stride32': in_data[8],
            'stride16': in_data[7],
            'stride8': in_data[6],
            'stride4': in_data[5],
        }

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            scores = cls_prob_dict['stride' +
                                   str(s)].asnumpy()[:,
                                                     self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cls_prob_dict = {
            'stride64': in_data[4],
            'stride32': in_data[3],
            'stride16': in_data[2],
            'stride8': in_data[1],
            'stride4': in_data[0],
        }
        bbox_pred_dict = {
            'stride64': in_data[9],
            'stride32': in_data[8],
            'stride16': in_data[7],
            'stride8': in_data[6],
            'stride4': in_data[5],
        }

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
            scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
Esempio n. 7
0
    def forward(self, is_train, req, in_data, out_data, aux):

        cls_pro = in_data[4]

        bbox_pred_dict = {
            'stride128': in_data[3],
            'stride64': in_data[2],
            'stride32': in_data[1],
            'stride16': in_data[0],
        }
        cls_prob_dict = {
            'stride128': in_data[7],
            'stride64': in_data[6],
            'stride32': in_data[5],
            'stride16': in_data[4],
        }
        im_info = in_data[8].asnumpy()[0, :]
        im = in_data[9].asnumpy()

        proposal_list = []
        score_list = []

        destore_rois_list = []
        destore_cls_list = []

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            # im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes

            height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
            # Same story for the scores:
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            proposals = clip_boxes(proposals, im_info[:2])

            scores = cls_prob_dict['stride' + str(s)].asnumpy()
            s_list = []
            start = 0

            for i in range(self._num_classes):
                s = scores[:, start:start + self._num_anchors, :, :]
                start = start + self._num_anchors
                s = self._clip_pad(s, (height, width))
                s = s.transpose((0, 2, 3, 1)).reshape((-1, 1))
                s_list.append(s)
            scores = np.concatenate(s_list, axis=1)

            destore_rois_list.append(proposals)
            destore_cls_list.append(scores)

        destore_rois = np.concatenate(destore_rois_list, axis=0)
        destore_cls = np.concatenate(destore_cls_list, axis=0)

        #    print destore_cls
        s = np.max(destore_cls, axis=1)
        #      print s

        order = s.ravel().argsort()[::-1]
        order = order[:self._keep_num]
        destore_cls = destore_cls[order, :]
        destore_rois = destore_rois[order, :]

        vis = False
        if vis:
            vis_all_detection(im, destore_rois[:, :])

        self.assign(out_data[0], req[0], mx.nd.array(destore_rois))

        self.assign(out_data[1], req[1], mx.nd.array(destore_cls))
Esempio n. 8
0
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4,8,16,16,16),
                          scales = (8,8,8,16,32),ratios = (0.5,1,2), allowed_border = 0, balance_scale_bg = False):
    def _unmap(data, count, inds, fill = 0):
        if len(data.shape) == 1:
            ret = np.empty((count,),dtype = np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:],dtype = np.float32)
            ret.fill(fill)
            ret[inds,:] = data
        return ret
    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype = np.float32)
    ratios = np.array(ratios, dtype = np.float32)
    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0,4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        base_anchors = generate_anchors(base_size = feat_strides[feat_id], ratios = ratios, scales = [scales[feat_id]])

        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
         
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                               (all_anchors[:, 1] >= -allowed_border) &
                               (all_anchors[:, 2] < im_info[1] + allowed_border) &
                               (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        labels = np.empty((len(inds_inside),),dtype = np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid,len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors,anchors))
        fpn_labels = np.hstack((fpn_labels,labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height,feat_width,A,total_anchors])
    
    if gt_boxes.size > 0:
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float),gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis = 1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)),argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis = 0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        fpn_labels[gt_argmax_overlaps] = 1
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    else:
        fpn_labels[:] = 0

    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE ==-1 else int (cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size = (len(fg_inds) - num_fg), replace = False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1
    
    num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels>=1)
    bg_inds = np.where(fpn_labels ==0)[0]
    fpn_anchors_fid = np.hstack((0,fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0,len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size = (len(bg_inds) - num_bg), replace = False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

        
    fpn_bbox_targets = np.zeros((len(fpn_anchors),4),dtype = np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels>=1,:] = bbox_transform(fpn_anchors[fpn_labels>=1,:],gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
    
    fpn_bbox_weights = np.zeros((len(fpn_anchors),4),dtype = np.float32)
    fpn_bbox_weights[fpn_labels>=1,:] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0,len(feat_strides)):
        feat_height, feat_width,A,total_anchors = fpn_args[feat_id]
        labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]],total_anchors,fpn_inds_inside[feat_id],fill = -1)
        bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)
        bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)

        labels = labels.reshape((1,feat_height, feat_width,A)).transpose(0,3,1,2)
        labels = labels.reshape((1,A*feat_height*feat_width))
        bbox_targets = bbox_targets.reshape((1,feat_height,feat_width,A*4)).transpose(0,3,1,2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)

    label = {
        'label':np.concatenate(label_list,axis = 1),
        'bbox_target':np.concatenate(bbox_target_list, axis = 2),
        'bbox_weight':np.concatenate(bbox_weight_list,axis = 2)
    }

    return label#label['label'] = 1,(A*w1*h1+A*w2*h2 +...),label['bbox_target'] = (1,4A,(w1h1+w2h2+...))
Esempio n. 9
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, 0)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores_list = in_data[0].asnumpy()  #[1,n]
        #print 'score_list shape:',scores_list.shape
        bbox_deltas_list = in_data[1].asnumpy()  #[1,n*2]
        im_info = in_data[2].asnumpy()[0, :]
        p2_shape = in_data[3].asnumpy().shape
        p3_shape = in_data[4].asnumpy().shape
        p4_shape = in_data[5].asnumpy().shape
        p5_shape = in_data[6].asnumpy().shape
        p6_shape = in_data[7].asnumpy().shape
        feat_shape = []
        feat_shape.append(p2_shape)
        feat_shape.append(p3_shape)
        feat_shape.append(p4_shape)
        feat_shape.append(p5_shape)
        feat_shape.append(p6_shape)
        #t = time.time()
        #print 'feat_shape:', feat_shape
        num_feat = len(feat_shape)  #[1,5,4]
        score_index_start = 0
        bbox_index_start = 0
        keep_proposal = []
        keep_scores = []

        #t_1 = time.time()
        for i in range(num_feat):
            feat_stride = int(self._feat_stride[i])  #4,8,16,32,64
            #print 'feat_stride:', feat_stride
            anchor = generate_anchors(feat_stride,
                                      scales=self._scales,
                                      ratios=self._ratios)
            num_anchors = anchor.shape[0]  #3

            height = feat_shape[i][2]
            width = feat_shape[i][3]

            shift_x = np.arange(0, width) * feat_stride
            shift_y = np.arange(0, height) * feat_stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            A = num_anchors  #3
            K = shifts.shape[0]  #height*width
            anchors = anchor.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))  #3*height*widht,4
            scores = (scores_list[
                0,
                int(score_index_start):int(score_index_start +
                                           K * A * 2)]).reshape(
                                               (1, int(2 * num_anchors), -1,
                                                int(width)))  #1,2*3,h,w
            scores = scores[:, num_anchors:, :, :]  #1,3,h,w
            bbox_deltas = (bbox_deltas_list[
                0, int(bbox_index_start):int(bbox_index_start +
                                             K * A * 4)]).reshape(
                                                 (1, int(4 * num_anchors), -1,
                                                  int(width)))  #1,4*3,h,w
            score_index_start += K * A * 2
            bbox_index_start += K * A * 4
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
                (-1, 4))  #[1,h,w,12]--->[1*h*w*3,4]
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape(
                (-1, 1))  #[1,h,w,3]--->[1*h*w*3,1]
            proposals = bbox_pred(anchors,
                                  bbox_deltas)  #debug here, corresponding?
            proposals = clip_boxes(proposals, im_info[:2])
            keep = self._filter_boxes(proposals, min_size[i] * im_info[2])
            keep_proposal.append(proposals[keep, :])
            keep_scores.append(scores[keep])

        proposals = keep_proposal[0]
        scores = keep_scores[0]
        for i in range(1, num_feat):
            proposals = np.vstack((proposals, keep_proposal[i]))
            scores = np.vstack((scores, keep_scores[i]))
        #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        #t_2 = time.time()
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2)
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        #t_nms = time.time()
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:, 2] = 16
                proposals[:, 3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1),
                                      dtype=np.float32)
                blob = np.hstack(
                    (batch_inds, proposals.astype(np.float32, copy=False)))
                self.assign(out_data[0], req[0], blob)

                if self._output_score:
                    self.assign(out_data[1], req[1],
                                scores.astype(np.float32, copy=False))
                return
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2)
        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))