예제 #1
0
    def fast_rcnn_net(self, features, is_training):
        """
        base the feature to compute the finial bbox and scores
        :param features:(batch_size, num_proposal, 7, 7, channels)
        :return:
        fast_rcnn_encode_boxes: (batch_size, num_proposal, num_classes*4)
        fast_rcnn_scores:(batch_size, num_proposal, num_classes)
        """
        def batch_slice_fast_rcnn_net(features, config, is_training):

            with tf.variable_scope('fast_rcnn_net', reuse=tf.AUTO_REUSE):
                with slim.arg_scope(
                    [slim.fully_connected],
                        activation_fn=None,
                        weights_initializer=tf.glorot_uniform_initializer(),
                        weights_regularizer=slim.l2_regularizer(
                            config.WEIGHT_DECAY)):

                    batch_norm_params = {
                        'is_training': is_training,
                        'decay': 0.997,
                        'epsilon': 1e-5,
                        'scale': True,
                        'trainable': True,
                        'updates_collections': tf.GraphKeys.UPDATE_OPS,
                    }
                    with slim.arg_scope(
                        [slim.conv2d],
                            stride=1,
                            padding="VALID",
                            activation_fn=tf.nn.relu,
                            weights_initializer=tf.glorot_uniform_initializer(
                            ),
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(
                                config.WEIGHT_DECAY)):
                        with slim.arg_scope([slim.batch_norm],
                                            **batch_norm_params):

                            net = slim.conv2d(inputs=features,
                                              num_outputs=1024,
                                              kernel_size=[
                                                  self.config.ROI_SIZE,
                                                  config.ROI_SIZE
                                              ],
                                              scope="fc_1")

                            net = slim.conv2d(inputs=net,
                                              num_outputs=1024,
                                              kernel_size=[1, 1],
                                              scope="fc_2")

                    net = tf.squeeze(net, axis=[1, 2])
                    fast_rcnn_scores = slim.fully_connected(net,
                                                            config.NUM_CLASS,
                                                            scope='classifier')

                    fast_rcnn_encode_boxes = slim.fully_connected(
                        net, config.NUM_CLASS * 4, scope='regressor')

                    fast_rcnn_encode_boxes = tf.reshape(
                        fast_rcnn_encode_boxes, [-1, config.NUM_CLASS, 4])

                return fast_rcnn_encode_boxes, fast_rcnn_scores

        fast_rcnn_encode_boxes, fast_rcnn_scores = boxes_utils.batch_slice(
            [features],
            lambda x: batch_slice_fast_rcnn_net(x, self.config, is_training),
            self.config.PER_GPU_IMAGE)

        return fast_rcnn_encode_boxes, fast_rcnn_scores
예제 #2
0
def model_fn(features, labels, mode, params):

    # ***********************************************************************************************
    # *                                         Backbone Net                                           *
    # ***********************************************************************************************
    net_config = params["net_config"]
    if mode == tf.estimator.ModeKeys.TRAIN:
        IS_TRAINING = True
    else:
        IS_TRAINING = False

    origin_image_batch = features["image"]
    image_batch = origin_image_batch - tf.convert_to_tensor(
        net_config.PIXEL_MEANS, dtype=tf.float32)
    image_window = features["image_window"]
    # there is is_training means that bn is training, so it is important!
    _, share_net = get_network_byname(net_name='resnet_v1_50',
                                      inputs=image_batch,
                                      num_classes=None,
                                      is_training=True,
                                      global_pool=True,
                                      output_stride=None,
                                      spatial_squeeze=True)
    # ***********************************************************************************************
    # *                                      FPN                                          *
    # ***********************************************************************************************
    feature_pyramid = build_fpn.build_feature_pyramid(share_net, net_config)
    # ***********************************************************************************************
    # *                                      RPN                                             *
    # ***********************************************************************************************
    gtboxes_and_label_batch = labels.get("gt_box_labels")
    rpn = build_rpn.RPN(feature_pyramid=feature_pyramid,
                        image_window=image_window,
                        config=net_config)

    # rpn_proposals_scores==(2000,)
    rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals(IS_TRAINING)
    rpn_location_loss, rpn_classification_loss = rpn.rpn_losses(
        labels["minibatch_indices"], labels["minibatch_encode_gtboxes"],
        labels["minibatch_objects_one_hot"])

    rpn_total_loss = rpn_classification_loss + rpn_location_loss

    # ***********************************************************************************************
    # *                                   Fast RCNN Head                                          *
    # ***********************************************************************************************

    fpn_fast_rcnn_head = build_head.FPNHead(
        feature_pyramid=feature_pyramid,
        rpn_proposals_boxes=rpn_proposals_boxes,
        origin_image=origin_image_batch,
        gtboxes_and_label=gtboxes_and_label_batch,
        config=net_config,
        is_training=False,
        image_window=image_window)

    detections = fpn_fast_rcnn_head.head_detection()
    if net_config.DEBUG:
        print_tensors(rpn_proposals_scores[0, :50], "scores")
        print_tensors(rpn_proposals_boxes[0, :50, :], "bbox")
        rpn_proposals_vision = draw_boxes_with_scores(
            origin_image_batch[0, :, :, :], rpn_proposals_boxes[0, :50, :],
            rpn_proposals_scores[0, :50])
        head_vision = draw_boxes_with_categories_and_scores(
            origin_image_batch[0, :, :, :], detections[0, :, :4],
            detections[0, :, 4], detections[0, :, 5], net_config.LABEL_TO_NAME)
        tf.summary.image("rpn_proposals_vision", rpn_proposals_vision)
        tf.summary.image("head_vision", head_vision)

    head_location_loss, head_classification_loss = fpn_fast_rcnn_head.head_loss(
    )
    head_total_loss = head_location_loss + head_classification_loss

    # train
    with tf.name_scope("regularization_losses"):
        regularization_list = [
            tf.nn.l2_loss(w.read_value()) * net_config.WEIGHT_DECAY /
            tf.cast(tf.size(w.read_value()), tf.float32)
            for w in tf.trainable_variables()
            if 'gamma' not in w.name and 'beta' not in w.name
        ]
        regularization_loss = tf.add_n(regularization_list)

    total_loss = regularization_loss + head_total_loss + rpn_total_loss
    total_loss = tf.cond(tf.is_nan(total_loss), lambda: 0.0,
                         lambda: total_loss)
    print_tensors(head_total_loss, "head_loss")
    print_tensors(rpn_total_loss, "rpn_loss")
    global_step = tf.train.get_or_create_global_step()
    tf.train.init_from_checkpoint(
        net_config.CHECKPOINT_DIR,
        {net_config.BACKBONE_NET + "/": net_config.BACKBONE_NET + "/"})
    with tf.name_scope("optimizer"):
        lr = tf.train.piecewise_constant(
            global_step,
            boundaries=[np.int64(net_config.BOUNDARY[0])],
            values=[net_config.LEARNING_RATE, net_config.LEARNING_RATE / 10])
        optimizer = tf.train.MomentumOptimizer(lr,
                                               momentum=net_config.MOMENTUM)
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies([tf.group(*update_ops)]):
            grads = optimizer.compute_gradients(total_loss)
            # clip gradients
            grads = tf.contrib.training.clip_gradient_norms(
                grads, net_config.CLIP_GRADIENT_NORM)
            train_op = optimizer.apply_gradients(grads, global_step)

    # ***********************************************************************************************
    # *                                          Summary                                            *
    # ***********************************************************************************************
    # rpn loss and image
    tf.summary.scalar('rpn_location_loss',
                      rpn_location_loss,
                      family="rpn_loss")
    tf.summary.scalar('rpn_classification_loss',
                      rpn_classification_loss,
                      family="rpn_loss")
    tf.summary.scalar('rpn_total_loss', rpn_total_loss, family="rpn_loss")

    tf.summary.scalar('head_location_loss',
                      head_location_loss,
                      family="head_loss")
    tf.summary.scalar('head_classification_loss',
                      head_classification_loss,
                      family="head_loss")
    tf.summary.scalar('head_total_loss', head_total_loss, family="head_loss")
    tf.summary.scalar("regularization_loss", regularization_loss)
    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('learning_rate', lr)

    meta_hook = MetadataHook(save_steps=net_config.SAVE_EVERY_N_STEP *
                             net_config.EPOCH / 2,
                             output_dir=net_config.MODLE_DIR)
    summary_hook = tf.train.SummarySaverHook(
        save_steps=net_config.SAVE_EVERY_N_STEP,
        output_dir=net_config.MODLE_DIR,
        summary_op=tf.summary.merge_all())
    hooks = [summary_hook]
    if net_config.COMPUTE_TIME:
        hooks.append(meta_hook)
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          train_op=train_op,
                                          training_hooks=hooks)

    # ***********************************************************************************************
    # *                                            EVAL                                             *
    # ***********************************************************************************************
    metric_ap_dict = batch_slice([
        features["gt_box_labels"][:, :, :4], features["gt_box_labels"][:, :,
                                                                       4],
        detections[:, :, :4], detections[:, :, 4], detections[:, :, 5]
    ], lambda x, y, z, u, v: compute_metric_ap(x, y, z, u, v, net_config),
                                 net_config.PER_GPU_IMAGE)

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=metric_ap_dict)
    def build_frcnn_target(self):
        '''
        when training, we should know each reference box's label and gtbox,
        in second stage
        iou >= 0.5 is object
        iou < 0.5 is background
        this function need batch_slice
        :return:
        minibatch_reference_proboxes: (batch_szie, config.FAST_RCNN_MINIBATCH_SIZE, 4)[y1, x1, y2, x2]
        minibatch_encode_gtboxes:(batch_szie, config.FAST_RCNN_MINIBATCH_SIZE, 4)[dy, dx, log(dh), log(dw)]
        object_mask:(batch_szie, config.FAST_RCNN_MINIBATCH_SIZE) 1 indicate is object, 0 indicate is not objects
        label_one_hot: (batch_szie, config.FAST_RCNN_MINIBATCH_SIZE, num_class)
        '''
        def batch_slice_build_target(gtboxes_and_label, rpn_proposals_boxes, config):

            with tf.variable_scope('build_faster_rcnn_targets'):

                with tf.variable_scope('fast_rcnn_find_positive_negative_samples'):
                    gtboxes = tf.cast(
                        tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]), tf.float32)
                    gt_class_ids = tf.cast(
                        tf.reshape(gtboxes_and_label[:, -1], [-1, ]), tf.int32)
                    gtboxes, non_zeros = boxes_utils.trim_zeros_graph(gtboxes, name="trim_gt_box")  # [M, 4]
                    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros)
                    rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph(rpn_proposals_boxes,
                                                                          name="trim_rpn_proposal_train")

                    ious = boxes_utils.iou_calculate(rpn_proposals_boxes, gtboxes)  # [N, M]
                    matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32)  # [N, ]
                    max_iou_each_row = tf.reduce_max(ious, axis=1)
                    positives = tf.cast(tf.greater_equal(max_iou_each_row, config.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32)

                    reference_boxes_mattached_gtboxes = tf.gather(gtboxes, matchs)  # [N, 4]
                    gt_class_ids = tf.gather(gt_class_ids, matchs)  # [N, ]
                    object_mask = tf.cast(positives, tf.float32)  # [N, ]
                    # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient
                    gt_class_ids = gt_class_ids * positives

                with tf.variable_scope('fast_rcnn_minibatch'):
                    # choose the positive indices
                    positive_indices = tf.reshape(tf.where(tf.not_equal(object_mask, 0.)), [-1])
                    num_of_positives = tf.minimum(tf.shape(positive_indices)[0],
                                                  tf.cast(config.FAST_RCNN_MINIBATCH_SIZE*config.FAST_RCNN_POSITIVE_RATE,
                                                          tf.int32))
                    positive_indices = tf.random_shuffle(positive_indices)
                    positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives])
                    # choose the negative indices,
                    # Strictly propose the proportion of positive and negative is 1:3
                    negative_indices = tf.reshape(tf.where(tf.equal(object_mask, 0.)), [-1])
                    num_of_negatives = tf.cast(int(1. / config.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\
                                       - num_of_positives

                    num_of_negatives = tf.minimum(tf.shape(negative_indices)[0], num_of_negatives)
                    negative_indices = tf.random_shuffle(negative_indices)
                    negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives])

                    minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0)
                    minibatch_reference_gtboxes = tf.gather(reference_boxes_mattached_gtboxes,
                                                            minibatch_indices)
                    minibatch_reference_proboxes = tf.gather(rpn_proposals_boxes, minibatch_indices)
                    # encode gtboxes
                    minibatch_encode_gtboxes = \
                        encode_and_decode.encode_boxes(
                            unencode_boxes=minibatch_reference_gtboxes,
                            reference_boxes=minibatch_reference_proboxes,
                            dev_factors=config.BBOX_STD_DEV)
                    object_mask = tf.gather(object_mask, minibatch_indices)
                    gt_class_ids = tf.gather(gt_class_ids, minibatch_indices)

                    # padding if necessary
                    gap = tf.cast(config.FAST_RCNN_MINIBATCH_SIZE - (num_of_positives + num_of_negatives), dtype=tf.int32)
                    bbox_padding = tf.zeros((gap, 4))
                    minibatch_reference_proboxes = tf.concat([minibatch_reference_proboxes, bbox_padding], axis=0)
                    minibatch_encode_gtboxes = tf.concat([minibatch_encode_gtboxes, bbox_padding], axis=0)
                    object_mask = tf.pad(object_mask, [(0, gap)])
                    gt_class_ids = tf.pad(gt_class_ids, [(0, gap)])

                return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids

        minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids = \
                boxes_utils.batch_slice([self.gtboxes_and_label, self.rpn_proposals_boxes],
                                        lambda x, y: batch_slice_build_target(x, y, self.config),
                                        self.config.PER_GPU_IMAGE)
        if DEBUG:
            gt_vision = draw_boxes_with_categories(self.origin_image[0],
                                                   self.gtboxes_and_label[0, :, :4],
                                                   self.gtboxes_and_label[0, :, 4])
            tf.summary.image("gt_vision", gt_vision)

            draw_bbox_train = draw_boxes_with_categories(self.origin_image[0],
                                                         minibatch_reference_proboxes[0],
                                                         gt_class_ids[0])
            tf.summary.image("positive_proposal", draw_bbox_train)

        return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids
    def rpn_proposals(self, is_training):
        """
        :param is_training:
        :return:
        rpn_proposals_boxes: (batch_size, config.MAX_PROPOSAL_SIZE, 4)(y1, x1, y2, x2)
        """
        anchors = make_anchor.generate_pyramid_anchors(self.config)
        if is_training:
            rpn_proposals_num = self.config.MAX_PROPOSAL_NUM_TRAINING
        else:
            rpn_proposals_num = self.config.MAX_PROPOSAL_NUM_INFERENCE

        def batch_slice_rpn_proposals(rpn_encode_boxes, rpn_scores, anchors,
                                      config, rpn_proposals_num):

            with tf.variable_scope('rpn_proposals'):
                rpn_softmax_scores = slim.softmax(rpn_scores)
                rpn_object_score = rpn_softmax_scores[:,
                                                      1]  # second column represent object
                if config.RPN_TOP_K_NMS:
                    top_k_indices = tf.nn.top_k(rpn_object_score,
                                                k=config.RPN_TOP_K_NMS).indices
                    rpn_object_score = tf.gather(rpn_object_score,
                                                 top_k_indices)
                    rpn_encode_boxes = tf.gather(rpn_encode_boxes,
                                                 top_k_indices)
                    anchors = tf.gather(anchors, top_k_indices)

                rpn_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=rpn_encode_boxes,
                    reference_boxes=anchors,
                    dev_factors=config.RPN_BBOX_STD_DEV)

                valid_indices = boxes_utils.non_maximal_suppression(
                    boxes=rpn_decode_boxes,
                    scores=rpn_object_score,
                    max_output_size=rpn_proposals_num,
                    iou_threshold=config.RPN_NMS_IOU_THRESHOLD)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, valid_indices)
                rpn_object_score = tf.gather(rpn_object_score, valid_indices)
                # clip proposals to img boundaries(replace the out boundary with image boundary)
                rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    rpn_decode_boxes,
                    [0, 0, config.TARGET_SIDE - 1, config.TARGET_SIDE - 1])
                # Pad if needed
                padding = tf.maximum(
                    rpn_proposals_num - tf.shape(rpn_decode_boxes)[0], 0)
                # care about why we don't use tf.pad in there
                zeros_padding = tf.zeros((padding, 4), dtype=tf.float32)
                rpn_proposals_boxes = tf.concat(
                    [rpn_decode_boxes, zeros_padding], axis=0)
                rpn_object_score = tf.pad(rpn_object_score, [(0, padding)])

                return rpn_proposals_boxes, rpn_object_score

        rpn_proposals_boxes, rpn_object_scores = \
            boxes_utils.batch_slice([self.rpn_encode_boxes, self.rpn_scores],
                                    lambda x, y: batch_slice_rpn_proposals(x, y, anchors,
                                                                              self.config, rpn_proposals_num),
                                    self.config.PER_GPU_IMAGE)

        return rpn_proposals_boxes, rpn_object_scores
    def fast_rcnn_proposals(self, rpn_proposal_bbox, encode_boxes, categories, scores, image_window):
        """
        padding zeros to keep alignments
        :return:
        detection_boxes_scores_labels:(batch_size, config.MAX_DETECTION_INSTANCE, 6)
        """

        def batch_slice_rcnn_proposals(rpn_proposal_bbox,
                                       encode_boxes,
                                       categories,
                                       scores,
                                       image_window,
                                       config):
            """
            mutilclass NMS
            :param rpn_proposal_bbox: (N, 4)
            :param encode_boxes: (N, 4)
            :param categories:(N, )
            :param scores: (N, )
            :param image_window:(y1, x1, y2, x2) the boundary of image
            :return:
            detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels]
            """
            with tf.variable_scope('fast_rcnn_proposals'):
                # trim the zero graph
                rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph(rpn_proposal_bbox,
                                                                            name="trim_proposals_detection")
                encode_boxes = tf.boolean_mask(encode_boxes, non_zeros)
                categories = tf.boolean_mask(categories, non_zeros)
                scores = tf.boolean_mask(scores, non_zeros)
                fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=encode_boxes,
                                                                        reference_boxes=rpn_proposal_bbox,
                                                                        dev_factors=config.BBOX_STD_DEV)
                fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(fast_rcnn_decode_boxes,
                                                                                  image_window)

                # remove the background
                keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32)
                if DEBUG:
                    print_categories = tf.gather(categories, keep)
                    print_scores = tf.gather(scores, keep)
                    num_item = tf.minimum(tf.shape(print_scores)[0], 50)
                    print_scores_vision, print_index = tf.nn.top_k(print_scores, k=num_item)
                    print_categories_vision = tf.gather(print_categories, print_index)
                    print_tensors(print_categories_vision, "categories")
                    print_tensors(print_scores_vision, "scores")
                    mean_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
                # Filter out low confidence boxes
                if config.FINAL_SCORE_THRESHOLD:
                    conf_keep = tf.cast(tf.where(scores >= config.FINAL_SCORE_THRESHOLD)[:, 0], tf.int32)
                    keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                                    tf.expand_dims(conf_keep, 0))
                    keep = tf.sparse_tensor_to_dense(keep)[0]

                pre_nms_class_ids = tf.gather(categories, keep)
                pre_nms_scores = tf.gather(scores, keep)
                pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep)
                unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

                def nms_keep_map(class_id):
                    """Apply Non-Maximum Suppression on ROIs of the given class."""
                    # Indices of ROIs of the given class
                    ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
                    # Apply NMS
                    class_keep = tf.image.non_max_suppression(
                        tf.gather(pre_nms_rois, ixs),
                        tf.gather(pre_nms_scores, ixs),
                        max_output_size=config.DETECTION_MAX_INSTANCES,
                        iou_threshold=config.FAST_RCNN_NMS_IOU_THRESHOLD)
                    # Map indicies
                    class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
                    # Pad with -1 so returned tensors have the same shape
                    gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
                    class_keep = tf.pad(class_keep, [(0, gap)],
                                        mode='CONSTANT', constant_values=-1)
                    # Set shape so map_fn() can infer result shape
                    class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
                    return class_keep
                # 2. Map over class IDs
                nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids,
                                     dtype=tf.int32)
                # 3. Merge results into one list, and remove -1 padding
                nms_keep = tf.reshape(nms_keep, [-1])
                nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
                # 4. Compute intersection between keep and nms_keep
                keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                                tf.expand_dims(nms_keep, 0))
                keep = tf.sparse_tensor_to_dense(keep)[0]
                # Keep top detections
                roi_count = config.DETECTION_MAX_INSTANCES
                class_scores_keep = tf.gather(scores, keep)
                num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
                top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
                keep = tf.gather(keep, top_ids)

                # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
                # Coordinates are normalized.
                detections = tf.concat([
                    tf.gather(fast_rcnn_decode_boxes, keep),
                    tf.to_float(tf.gather(categories, keep))[..., tf.newaxis],
                    tf.gather(scores, keep)[..., tf.newaxis]
                ], axis=1)

                # Pad with zeros if detections < DETECTION_MAX_INSTANCES
                gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
                detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")

                return detections

        detections = boxes_utils.batch_slice([rpn_proposal_bbox, encode_boxes, categories, scores, image_window],
                                             lambda x, y, z, u, v: batch_slice_rcnn_proposals(x, y, z, u, v,
                                             self.config), self.config.PER_GPU_IMAGE)
        return detections