Exemple #1
0
    def refine_poses(self,
                     keypoint_thresh=10,
                     score_thresh=0.5,
                     neck_thresh=0.59,
                     margin=0.0):
        W, H = 104.73, 67.74

        for i, basename in enumerate(tqdm(self.frame_basenames)):
            poses = self.poses[basename]

            # remove the poses with few keypoints or they
            keep = []
            for ii in range(len(poses)):
                keypoints = poses[ii]
                valid = (keypoints[:, 2] > 0.).nonzero()[0]
                score = np.sum(keypoints[valid, 2])

                if len(
                        valid
                ) > keypoint_thresh and score > score_thresh and keypoints[
                        1, 2] > neck_thresh:
                    keep.append(ii)

            poses = [poses[ii] for ii in keep]

            root_part = 1
            root_box = []
            for ii in range(len(poses)):
                root_tmp = poses[ii][root_part, :]
                valid_keypoints = (poses[ii][:, 2] > 0).nonzero()
                root_box.append([
                    root_tmp[0] - 10, root_tmp[1] - 10, root_tmp[0] + 10,
                    root_tmp[1] + 10,
                    np.sum(poses[ii][valid_keypoints, 2])
                ])
            root_box = np.array(root_box)

            # Perform Neck NMS
            if len(root_box.shape) == 1:
                root_box = root_box[None, :]
                keep2 = [0]
            else:
                keep2 = nms(root_box.astype(np.float32), 0.1)

            poses = [poses[ii] for ii in keep2 if ii < len(poses)]

            # Remove poses outside of field
            keep3 = []
            cam_mat = self.calib[basename]
            cam = cam_utils.Camera(basename, cam_mat['A'], cam_mat['R'],
                                   cam_mat['T'], self.shape[0], self.shape[1])
            for ii in range(len(poses)):
                kp3 = misc_utils.lift_keypoints_in_3d(cam, poses[ii])
                if (-W / 2. - margin) <= kp3[1, 0] <= (W / 2. + margin) and (
                        -H / 2. - margin) <= kp3[1, 2] <= (H / 2. + margin):
                    keep3.append(ii)

            poses = [poses[ii] for ii in keep3]

            self.poses[basename] = poses
Exemple #2
0
    def refine_detectron(self, basename, score_thresh=0.9, nms_thresh=0.5, min_height=0.0, min_area=200):

        data = self.detectron[basename]
        boxes, segms, keyps, classes = data['boxes'], data['segms'], data['keyps'], data['classes']

        valid = (boxes[:, 4] > score_thresh) * ([j == 1 for j in classes])
        valid = (valid==True).nonzero()[0]
        boxes = boxes[valid, :]
        segms = [segms[i] for i in valid]
        classes = [classes[i] for i in valid]

        cam_mat = self.calib[basename]
        cam = cam_utils.Camera(basename, cam_mat['A'], cam_mat['R'], cam_mat['T'], self.shape[0], self.shape[1])

        keep, __ = misc_utils.putting_objects_in_perspective(cam, boxes, min_height=min_height)
        boxes = boxes[keep, :]
        segms = [segms[i] for i in keep]
        classes = [classes[i] for i in keep]

        valid_nms = nms(boxes.astype(np.float32), nms_thresh)
        boxes = boxes[valid_nms, :]
        segms = [segms[i] for i in valid_nms]
        classes = [classes[i] for i in valid_nms]

        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        valid_area = (areas > min_area).nonzero()[0]
        boxes = boxes[valid_area, :]
        segms = [segms[i] for i in valid_area]
        classes = [classes[i] for i in valid_area]

        return boxes, segms, keyps, classes
Exemple #3
0
    def get_ball_from_detectron(self, thresh=0.0, nms_thresh=0.5):
        for i, basename in enumerate(tqdm(self.frame_basenames)):
            data = self.detectron[basename]
            boxes, segms, keyps, classes = data['boxes'], data['segms'], data['keyps'], data['classes']
            valid = (boxes[:, 4] > thresh)*([j == 33 for j in classes])
            boxes = boxes[valid, :]

            valid_nms = nms(boxes.astype(np.float32), nms_thresh)
            boxes = boxes[valid_nms, :]

            self.ball[basename] = boxes
Exemple #4
0
    def refine_poses(self,
                     keypoint_thresh=10,
                     score_thresh=0.5,
                     neck_thresh=0.59):

        for i, basename in enumerate(tqdm(self.frame_basenames)):
            poses = self.poses[basename]

            # remove the poses with few keypoints or they
            keep = []
            for ii in range(len(poses)):
                keypoints = poses[ii]
                valid = (keypoints[:, 2] > 0.).nonzero()[0]
                score = np.sum(keypoints[valid, 2])

                if len(
                        valid
                ) > keypoint_thresh and score > score_thresh and keypoints[
                        1, 2] > neck_thresh:
                    keep.append(ii)

            poses = [poses[ii] for ii in keep]

            root_part = 1
            root_box = []
            for ii in range(len(poses)):
                root_tmp = poses[ii][root_part, :]
                valid_keypoints = (poses[ii][:, 2] > 0).nonzero()
                root_box.append([
                    root_tmp[0] - 10, root_tmp[1] - 10, root_tmp[0] + 10,
                    root_tmp[1] + 10,
                    np.sum(poses[ii][valid_keypoints, 2])
                ])
            root_box = np.array(root_box)

            # Perform Neck NMS

            if len(root_box.shape) == 1:
                root_box = root_box[None, :]
                keep2 = [0]
            else:
                keep2 = nms(root_box.astype(np.float32), 0.1)

            poses = [poses[ii] for ii in keep2]
            self.poses[basename] = poses
    def forward(self, arguments, device=None, outputs_to_retain=None):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # use potentially different number of proposals for training vs evaluation
        if len(outputs_to_retain) == 0:
            # print("EVAL")
            pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N
            post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N
            nms_thresh = cfg["TEST"].RPN_NMS_THRESH
            min_size = cfg["TEST"].RPN_MIN_SIZE
        else:
            pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
            post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N
            nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH
            min_size = cfg["TRAIN"].RPN_MIN_SIZE

        bottom = arguments
        assert bottom[0].shape[0] == 1, \
            'Only single item batches are supported'

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0][:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1]
        im_info = bottom[2][0]

        if DEBUG:
            # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
            # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scaled im_size: ({}, {})'.format(im_info[2], im_info[3]))
            print('original im_size: ({}, {})'.format(im_info[4], im_info[5]))

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print('score map size: {}'.format(scores.shape))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info)

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4])
        cntk_image_scale = im_info[2] / im_info[4]
        keep = _filter_boxes(proposals, min_size * cntk_image_scale)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # pad with zeros if too few rois were found
        num_found_proposals = proposals.shape[0]
        if num_found_proposals < post_nms_topN:
            if DEBUG:
                print("Only {} proposals generated in ProposalLayer".format(
                    num_found_proposals))
            proposals_padded = np.zeros(
                ((post_nms_topN, ) + proposals.shape[1:]), dtype=np.float32)
            proposals_padded[:num_found_proposals, :] = proposals
            proposals = proposals_padded

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        # for CNTK: add batch axis to output shape
        proposals.shape = (1, ) + proposals.shape

        return None, proposals
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0][:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1]
        im_info = bottom[2][0, :]

        #if DEBUG:
        #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        #    print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        #if DEBUG:
        #    print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))

        return blob
Exemple #7
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]

        pre_nms_topN = self.cf.rpn_pre_nms_top_n
        post_nms_topN = self.cf.rpn_post_nms_top_n
        nms_thresh = self.cf.rpn_nms_thresh

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0][:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1]
        im_info = bottom[2][0, :]

        #if DEBUG:
        #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        #    print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        #if DEBUG:
        #    print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        return blob
    def forward(self, arguments, device=None, outputs_to_retain=None):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # use potentially different number of proposals for training vs evaluation
        if len(outputs_to_retain) == 0:
            # print("EVAL")
            pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N
            post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N
            nms_thresh = cfg["TEST"].RPN_NMS_THRESH
            min_size = cfg["TEST"].RPN_MIN_SIZE
        else:
            pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
            post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N
            nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH
            min_size = cfg["TRAIN"].RPN_MIN_SIZE

        bottom = arguments
        assert bottom[0].shape[0] == 1, \
            'Only single item batches are supported'

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0][:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1]
        im_info = bottom[2][0]

        if DEBUG:
            # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
            # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
            print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3]))
            print ('original im_size: ({}, {})'.format(im_info[4], im_info[5]))

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print ('score map size: {}'.format(scores.shape))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info)

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4])
        cntk_image_scale = im_info[2] / im_info[4]
        keep = _filter_boxes(proposals, min_size * cntk_image_scale)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # pad with zeros if too few rois were found
        num_found_proposals = proposals.shape[0]
        if num_found_proposals < post_nms_topN:
            if DEBUG:
                print("Only {} proposals generated in ProposalLayer".format(num_found_proposals))
            proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32)
            proposals_padded[:num_found_proposals, :] = proposals
            proposals = proposals_padded

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        # for CNTK: add batch axis to output shape
        proposals.shape = (1,) + proposals.shape

        return None, proposals
Exemple #10
0
def proposal_layer(inputs, anchors, thresh=0.5, args=None):
    """Receives anchor scores and selects a subset to pass as proposals
    to the second stage. Filtering is done based on anchor scores and
    non-max suppression to remove overlaps. It also applies bounding
    box refinment detals to anchors.

    Inputs:
        rpn_probs: [batch, anchors*height*width(fg prob)]
        rpn_bbox_deltas: [batch, anchors*height*width, 4]
        # gt_kps: [batch, num_keypoints, height, width]
        anchors: [batch, anchors*height*width, 4, (x1, y1, x2, y2)]

    Returns:
        Proposals in normalized coordinates [num_rois, 4 (x1, y1, x2, y2)]
        box_inds [num_rois]
    """

    # Currently only supports batchsize 1
    # inputs[0] = inputs[0].squeeze(0)
    # inputs[1] = inputs[1].squeeze(0)

    # Box Scores, select the fg prob.
    # scores = inputs[0][:, :, :, :, 1]
    # scores = scores.transpose(1, 3).contiguous().view(-1)
    scores = inputs[0]
    deltas = inputs[1]
    boxes_out = []
    boxes_ind = []
    # kps = inputs[2]
    # max_rois = args.max_rois
    gpu_count = torch.cuda.device_count()
    bs = args.batch // gpu_count
    # boxes_out = np.zeros((bs, max_rois, 4))
    # box_ind = np.ones(bs, max_rois) * -1
    # total_anchors = scores.size(1) // bs
    for i in range(bs):
        pos_ix = torch.nonzero(scores[i] > thresh)
        if pos_ix.dim() > 1:
            pos_ix = pos_ix.squeeze(1)
        try:
            # if pox_ix.dim() == 0:
            #     pos_ix = pos_ix.unsqueeze(0)
            if pos_ix.size(0) == 0:
                # print('no positive ix')
                if bs == 1:
                    raise Exception('no roi in this img')
                continue
        except:
            print('pos ix: ', pos_ix)
            continue
            # raise Exception('pos ix error')
            # print('positive ix')

        # Box deltas [batch, num_rois, 4]
        # boxes = torch.from_numpy(anchors).float().cuda().detach()
        # boxes = boxes.expand(bs, -1, -1, -1, -1)
        # boxes = boxes.transpose(0, 4).contiguous().view(-1, 4)
        # assert anchors.size(0) == deltas.size(0)
        scores_i = torch.index_select(scores[i], 0,
                                      pos_ix)  # scores = scores[pos_ix]
        deltas_i = torch.index_select(deltas[i], 0, pos_ix)
        anchors = anchors.to(deltas.device)
        anchors_i = torch.index_select(anchors[i], 0, pos_ix)

        # only got 16 positive anchors, no need to remove
        # Improve performance by trimming to top anchors by score
        # and doing the rest on the smaller subset.
        # pre_nms_limit = min(6000, anchors.size()[0])
        # scores, order = scores.sort(descending=True)
        # order = order[:pre_nms_limit]
        # scores = scores[:pre_nms_limit]
        # deltas = deltas[order.data, :] # TODO: Support batch size > 1 ff.
        # anchors = anchors[order.data, :]

        # Apply deltas to anchors to get refined anchors.
        # [batch, N, (y1, x1, y2, x2)]
        boxes = apply_box_deltas(anchors_i, deltas_i)

        # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)]
        size = args.img_size  # int(config['train_datasets']['search_size'])
        height, width = size, size
        window = np.array([0, 0, height, width]).astype(np.float32)
        boxes = clip_boxes(boxes, window)

        # Filter out small boxes
        # According to Xinlei Chen's paper, this reduces detection accuracy
        # for small objects, so we're skipping it.

        # Non-max suppression
        nms_threshold = args.nms_threshold  # float(config['train_datasets']['RPN_NMS'])
        # print('boxes shape: ', boxes.shape)
        # print('scores shape: ', scores.shape)
        # print('before nms boxes shape: ', boxes.shape)
        keep = nms(
            torch.cat((boxes, scores_i.unsqueeze(1)), 1).data, nms_threshold)
        # num_keep = len(keep)
        # if num_keep > max_rois:
        #     keep = keep[:max_rois]
        # print('keep length: ', num_keep)
        if keep.size(0) > 1:
            keep = keep[:1]
        boxes = boxes[keep, :]
        boxes_out.append(boxes)
        boxes_ind.extend([i] * keep.size(0))
        # ind_start = i * max_rois
        # boxes_out[i, ind_start:num_keep] = boxes
        # box_ind[i, ind_start:num_keep] = i
        # select_bs = keep // total_anchors
        # kps_i = torch.index_select(kps, 0, select_bs)

    boxes = torch.cat(boxes_out, 0)
    boxes_ind = torch.Tensor(boxes_ind).cuda()
    # Normalize dimensions to range of 0 to 1.
    norm = Variable(torch.from_numpy(np.array([height, width, height,
                                               width])).float(),
                    requires_grad=False)
    norm = norm.cuda()
    normalized_boxes = boxes / norm

    return normalized_boxes, boxes_ind, True
Exemple #11
0
def proposal_layer_bak(inputs, anchors, thresh=0.5, args=None):
    """Receives anchor scores and selects a subset to pass as proposals
    to the second stage. Filtering is done based on anchor scores and
    non-max suppression to remove overlaps. It also applies bounding
    box refinment detals to anchors.

    Inputs:
        rpn_probs: [batch, anchors*height*width(fg prob)]
        rpn_bbox_deltas: [batch, anchors*height*width, 4]
        anchors: [batch*anchors*height*width, 4, (x1, y1, x2, y2)]

    Returns:
        Proposals in normalized coordinates [num_rois, 4, (y1, x1, y2, x2)]
    """

    # Currently only supports batchsize 1
    # inputs[0] = inputs[0].squeeze(0)
    # inputs[1] = inputs[1].squeeze(0)

    # Box Scores, select the fg prob.
    # scores = inputs[0][:, :, :, :, 1]
    # scores = scores.transpose(1, 3).contiguous().view(-1)
    scores = inputs[0]
    deltas = inputs[1]
    pos_ix = torch.nonzero(scores > thresh).squeeze()

    if pos_ix.size(0) == 0:
        # print('no positive ix')
        return None, False
        # print('positive ix')

    # Box deltas [batch, num_rois, 4]
    # boxes = torch.from_numpy(anchors).float().cuda().detach()
    # boxes = boxes.expand(bs, -1, -1, -1, -1)
    # boxes = boxes.transpose(0, 4).contiguous().view(-1, 4)
    assert anchors.size(0) == deltas.size(0)
    scores = torch.index_select(scores, 0, pos_ix)  # scores = scores[pos_ix]
    deltas = torch.index_select(deltas, 0, pos_ix)
    anchors = anchors.to(deltas.device)
    anchors = torch.index_select(anchors, 0, pos_ix)

    # only got 16 positive anchors, no need to remove
    # Improve performance by trimming to top anchors by score
    # and doing the rest on the smaller subset.
    # pre_nms_limit = min(6000, anchors.size()[0])
    # scores, order = scores.sort(descending=True)
    # order = order[:pre_nms_limit]
    # scores = scores[:pre_nms_limit]
    # deltas = deltas[order.data, :] # TODO: Support batch size > 1 ff.
    # anchors = anchors[order.data, :]

    # Apply deltas to anchors to get refined anchors.
    # [batch, N, (y1, x1, y2, x2)]
    boxes = apply_box_deltas(anchors, deltas)

    # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)]
    size = args.img_size  # int(config['train_datasets']['search_size'])
    height, width = size, size
    window = np.array([0, 0, height, width]).astype(np.float32)
    boxes = clip_boxes(boxes, window)

    # Filter out small boxes
    # According to Xinlei Chen's paper, this reduces detection accuracy
    # for small objects, so we're skipping it.

    # Non-max suppression
    nms_threshold = args.nms_threshold  # float(config['train_datasets']['RPN_NMS'])
    # print('boxes shape: ', boxes.shape)
    # print('scores shape: ', scores.shape)
    keep = nms(torch.cat((boxes, scores.unsqueeze(1)), 1).data, nms_threshold)
    boxes = boxes[keep, :]
    select_bs = keep // total_anchors

    # Normalize dimensions to range of 0 to 1.
    norm = Variable(torch.from_numpy(np.array([height, width, height,
                                               width])).float(),
                    requires_grad=False)
    norm = norm.cuda()
    normalized_boxes = boxes / norm

    return normalized_boxes, True