コード例 #1
0
    def inference(self, mode, inputs):
        is_training = mode == 'TRAIN'

        ###decode your inputs
        [image, im_info, gt_boxes] = inputs

        image.set_shape([None, None, None, 3])
        im_info.set_shape([None, cfg.nr_info_dim])
        if mode == 'TRAIN':
            gt_boxes.set_shape([None, None, 5])
        ##end of decode

        num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios)
        bottleneck = resnet_v1.bottleneck

        blocks = [
            resnet_utils.Block('block1', bottleneck,
                               [(256, 64, 1, 1)] * 2 + [(256, 64, 1, 1)]),
            resnet_utils.Block('block2', bottleneck,
                               [(512, 128, 2, 1)] + [(512, 128, 1, 1)] * 3),
            resnet_utils.Block('block3', bottleneck,
                               [(1024, 256, 2, 1)] + [(1024, 256, 1, 1)] * 22),
            resnet_utils.Block('block4', bottleneck,
                               [(2048, 512, 1, 2)] + [(2048, 512, 1, 2)] * 2)
        ]

        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            with tf.variable_scope('resnet_v1_101', 'resnet_v1_101'):
                net = resnet_utils.conv2d_same(
                    image, 64, 7, stride=2, scope='conv1')
                net = slim.max_pool2d(
                    net, [3, 3], stride=2, padding='SAME', scope='pool1')
            net, _ = resnet_v1.resnet_v1(
                net, blocks[0:1], global_pool=False, include_root_block=False,
                scope='resnet_v1_101')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv3, _ = resnet_v1.resnet_v1(
                net, blocks[1:2], global_pool=False, include_root_block=False,
                scope='resnet_v1_101')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv4, _ = resnet_v1.resnet_v1(
                net_conv3, blocks[2:3], global_pool=False,
                include_root_block=False, scope='resnet_v1_101')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv5, _ = resnet_v1.resnet_v1(
                net_conv4, blocks[-1:], global_pool=False,
                include_root_block=False, scope='resnet_v1_101')

        initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)

        with tf.variable_scope(
                'resnet_v1_101', 'resnet_v1_101',
                regularizer=tf.contrib.layers.l2_regularizer(
                    cfg.weight_decay)):
            # rpn
            rpn = slim.conv2d(
                net_conv4, 512, [3, 3], trainable=is_training,
                weights_initializer=initializer, activation_fn=nn_ops.relu,
                scope="rpn_conv/3x3")
            rpn_cls_score = slim.conv2d(
                rpn, num_anchors * 2, [1, 1], trainable=is_training,
                weights_initializer=initializer, padding='VALID',
                activation_fn=None, scope='rpn_cls_score')
            rpn_bbox_pred = slim.conv2d(
                rpn, num_anchors * 4, [1, 1], trainable=is_training,
                weights_initializer=initializer, padding='VALID',
                activation_fn=None, scope='rpn_bbox_pred')

            # generate anchor
            height = tf.cast(tf.shape(rpn)[1], tf.float32)
            width = tf.cast(tf.shape(rpn)[2], tf.float32)
            anchors = generate_anchors_opr(
                height, width, cfg.stride[0], cfg.anchor_scales,
                cfg.anchor_ratios)
            # change it so that the score has 2 as its channel size
            rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob')
            rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score))

            rois, roi_scores = proposal_opr(
                rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride,
                anchors, num_anchors, is_tfchannel=True, is_tfnms=False)

            if is_training:
                with tf.variable_scope('anchor') as scope:
                    rpn_labels, rpn_bbox_targets = \
                        tf.py_func(
                            anchor_target_layer,
                            [gt_boxes, im_info, cfg.stride, anchors,
                             num_anchors],
                            [tf.float32, tf.float32])
                    rpn_labels = tf.to_int32(rpn_labels, name="to_int32")

                with tf.control_dependencies([rpn_labels]):
                    with tf.variable_scope('rpn_rois') as scope:
                        rois, labels, bbox_targets = \
                            tf.py_func(
                                proposal_target_layer,
                                [rois, gt_boxes, im_info],
                                [tf.float32, tf.float32, tf.float32])
                        labels = tf.to_int32(labels, name="to_int32")


        with tf.variable_scope(
                'resnet_v1_101', 'resnet_v1_101',
                regularizer=tf.contrib.layers.l2_regularizer(
                    cfg.weight_decay)):

            conv_new_1 = slim.conv2d(
                net_conv5, 1024, [1, 1], trainable=is_training,
                weights_initializer=initializer, activation_fn=nn_ops.relu,
                scope="conv_new_1")

            rfcn_cls = slim.conv2d(
                conv_new_1, 7 * 7 * cfg.num_classes, [1, 1],
                trainable=is_training, weights_initializer=initializer,
                activation_fn=None, scope="rfcn_cls")
            rfcn_bbox = slim.conv2d(
                conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training,
                weights_initializer=initializer,
                activation_fn=None, scope="rfcn_bbox")

            [psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool(
                rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0)
            [psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool(
                rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0)

            cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2])
            bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2])
            cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob")
            # cls_prob = tf.nn.softmax(cls_score, name="cls_prob")
            bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes])

        if not is_training:
            stds = np.tile(
                np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes))
            means = np.tile(
                np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes))
            bbox_pred *= stds
            bbox_pred += means

            ##############add prediction#####################
            tf.add_to_collection("rpn_cls_score", rpn_cls_score)
            tf.add_to_collection("rpn_cls_prob", rpn_cls_prob)
            tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred)
            tf.add_to_collection("cls_score", cls_score)
            tf.add_to_collection("cls_prob", cls_prob)
            tf.add_to_collection("bbox_pred", bbox_pred)
            tf.add_to_collection("rois", rois)

        else:
            #--------------------  rpn loss ---------------------------------#
            from detection_opr.utils import loss_opr_without_box_weight
            rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn(
                tf.reshape(rpn_bbox_pred, [-1, 4]),
                tf.reshape(rpn_bbox_targets, [-1, 4]),
                tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn)

            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_label = tf.reshape(rpn_labels, [-1])
            rpn_select = tf.where(tf.not_equal(rpn_label, -1))
            rpn_cls_score = tf.reshape(
                tf.gather(rpn_cls_score, rpn_select), [-1, 2])
            rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1])
            rpn_cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=rpn_cls_score, labels=rpn_label))

            #-------------------- rcnn loss  --------------------------------#
            label = tf.reshape(labels, [-1])
            cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss(
                tf.reshape(cls_score, [-1, cfg.num_classes]), label,
                bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling,
                cfg.num_classes)
            loss_box *= 2

            #--------------------add to colloection ------------------------#
            tf.add_to_collection('loss_cls', cross_entropy)
            tf.add_to_collection('loss_box', loss_box)
            tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy)
            tf.add_to_collection('rpn_loss_box', rpn_loss_box)
            loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
            tf.add_to_collection('losses', loss)
            return loss
コード例 #2
0
    def inference(self, mode, inputs):
        is_training = mode == 'TRAIN'

        ###decode your inputs
        [image, im_info, gt_boxes] = inputs

        image.set_shape([None, None, None, 3])
        im_info.set_shape([None, cfg.nr_info_dim])
        if mode == 'TRAIN':
            gt_boxes.set_shape([None, None, 5])
        ##end of decode

        num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios)
        bottleneck = resnet_v1.bottleneck

        blocks = [
            resnet_utils.Block('block1', bottleneck,
                               [(256, 64, 1, 1)] * 2 + [(256, 64, 1, 1)]),
            resnet_utils.Block('block2', bottleneck,
                               [(512, 128, 2, 1)] + [(512, 128, 1, 1)] * 3),
            resnet_utils.Block('block3', bottleneck,
                               [(1024, 256, 2, 1)] + [(1024, 256, 1, 1)] * 22),
            resnet_utils.Block('block4', bottleneck,
                               [(2048, 512, 1, 2)] + [(2048, 512, 1, 2)] * 2)
        ]

        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            with tf.variable_scope('resnet_v1_101', 'resnet_v1_101'):
                net = resnet_utils.conv2d_same(image,
                                               64,
                                               7,
                                               stride=2,
                                               scope='conv1')
                net = slim.max_pool2d(net, [3, 3],
                                      stride=2,
                                      padding='SAME',
                                      scope='pool1')
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[0:1],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope='resnet_v1_101')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv3, _ = resnet_v1.resnet_v1(net,
                                               blocks[1:2],
                                               global_pool=False,
                                               include_root_block=False,
                                               scope='resnet_v1_101')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv4, _ = resnet_v1.resnet_v1(net_conv3,
                                               blocks[2:3],
                                               global_pool=False,
                                               include_root_block=False,
                                               scope='resnet_v1_101')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv5, _ = resnet_v1.resnet_v1(net_conv4,
                                               blocks[-1:],
                                               global_pool=False,
                                               include_root_block=False,
                                               scope='resnet_v1_101')

        initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)

        with tf.variable_scope('resnet_v1_101',
                               'resnet_v1_101',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.weight_decay)):
            # rpn
            rpn = slim.conv2d(net_conv4,
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              activation_fn=nn_ops.relu,
                              scope="rpn_conv/3x3")
            rpn_cls_score = slim.conv2d(rpn,
                                        num_anchors * 2, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_bbox_pred = slim.conv2d(rpn,
                                        num_anchors * 4, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_bbox_pred')

            # generate anchor
            height = tf.cast(tf.shape(rpn)[1], tf.float32)
            width = tf.cast(tf.shape(rpn)[2], tf.float32)
            anchors = generate_anchors_opr(height, width, cfg.stride[0],
                                           cfg.anchor_scales,
                                           cfg.anchor_ratios)
            # change it so that the score has 2 as its channel size
            rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob')
            rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score))

            rois, roi_scores = proposal_opr(rpn_cls_prob,
                                            rpn_bbox_pred,
                                            im_info,
                                            mode,
                                            cfg.stride,
                                            anchors,
                                            num_anchors,
                                            is_tfchannel=True,
                                            is_tfnms=False)

            if is_training:
                with tf.variable_scope('anchor') as scope:
                    rpn_labels, rpn_bbox_targets = \
                        tf.py_func(
                            anchor_target_layer,
                            [gt_boxes, im_info, cfg.stride, anchors,
                             num_anchors],
                            [tf.float32, tf.float32])
                    rpn_labels = tf.to_int32(rpn_labels, name="to_int32")

                with tf.control_dependencies([rpn_labels]):
                    with tf.variable_scope('rpn_rois') as scope:
                        rois, labels, bbox_targets = \
                            tf.py_func(
                                proposal_target_layer,
                                [rois, gt_boxes, im_info],
                                [tf.float32, tf.float32, tf.float32])
                        labels = tf.to_int32(labels, name="to_int32")

        with tf.variable_scope('resnet_v1_101',
                               'resnet_v1_101',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.weight_decay)):

            conv_new_1 = slim.conv2d(net_conv5,
                                     1024, [1, 1],
                                     trainable=is_training,
                                     weights_initializer=initializer,
                                     activation_fn=nn_ops.relu,
                                     scope="conv_new_1")

            rfcn_cls = slim.conv2d(conv_new_1,
                                   7 * 7 * cfg.num_classes, [1, 1],
                                   trainable=is_training,
                                   weights_initializer=initializer,
                                   activation_fn=None,
                                   scope="rfcn_cls")
            rfcn_bbox = slim.conv2d(conv_new_1,
                                    7 * 7 * 4, [1, 1],
                                    trainable=is_training,
                                    weights_initializer=initializer,
                                    activation_fn=None,
                                    scope="rfcn_bbox")

            [psroipooled_cls_rois,
             _] = psroi_pooling_op.psroi_pool(rfcn_cls,
                                              rois,
                                              group_size=7,
                                              spatial_scale=1.0 / 16.0)
            [psroipooled_loc_rois,
             _] = psroi_pooling_op.psroi_pool(rfcn_bbox,
                                              rois,
                                              group_size=7,
                                              spatial_scale=1.0 / 16.0)

            cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2])
            bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2])
            cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob")
            # cls_prob = tf.nn.softmax(cls_score, name="cls_prob")
            bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes])

        if not is_training:
            stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS),
                           (cfg.num_classes))
            means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS),
                            (cfg.num_classes))
            bbox_pred *= stds
            bbox_pred += means

            ##############add prediction#####################
            tf.add_to_collection("rpn_cls_score", rpn_cls_score)
            tf.add_to_collection("rpn_cls_prob", rpn_cls_prob)
            tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred)
            tf.add_to_collection("cls_score", cls_score)
            tf.add_to_collection("cls_prob", cls_prob)
            tf.add_to_collection("bbox_pred", bbox_pred)
            tf.add_to_collection("rois", rois)

        else:
            #--------------------  rpn loss ---------------------------------#
            from detection_opr.utils import loss_opr_without_box_weight
            rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn(
                tf.reshape(rpn_bbox_pred, [-1, 4]),
                tf.reshape(rpn_bbox_targets, [-1, 4]),
                tf.reshape(rpn_labels, [-1]),
                sigma=cfg.simga_rpn)

            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_label = tf.reshape(rpn_labels, [-1])
            rpn_select = tf.where(tf.not_equal(rpn_label, -1))
            rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select),
                                       [-1, 2])
            rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1])
            rpn_cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=rpn_cls_score, labels=rpn_label))

            #-------------------- rcnn loss  --------------------------------#
            label = tf.reshape(labels, [-1])
            cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss(
                tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred,
                bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes)
            loss_box *= 2

            #--------------------add to colloection ------------------------#
            tf.add_to_collection('loss_cls', cross_entropy)
            tf.add_to_collection('loss_box', loss_box)
            tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy)
            tf.add_to_collection('rpn_loss_box', rpn_loss_box)
            loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
            tf.add_to_collection('losses', loss)
            return loss