Beispiel #1
0
def main(_):
    with tf.Graph().as_default():
        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = common_preprocessing.light_head_preprocess_for_test(
            image_input, [FLAGS.train_image_size] * 2,
            data_format=('NCHW'
                         if FLAGS.data_format == 'channels_first' else 'NHWC'))

        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            [FLAGS.train_image_size] * 2,
            layers_shapes=[(30, 30)],
            anchor_scales=[[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]],
            extra_anchor_scales=[[0.1]],
            anchor_ratios=[[1., 2., .5]],
            layer_steps=[16])

        all_anchors, num_anchors_list = anchor_creator.get_all_anchors()

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            all_anchors,
            num_classes=FLAGS.num_classes,
            allowed_borders=None,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[1., 1., 1., 1.])

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            rpn_feat_map, backbone_feat = xception_body.XceptionBody(
                features,
                FLAGS.num_classes,
                is_training=False,
                data_format=FLAGS.data_format)
            #rpn_feat_map = tf.Print(rpn_feat_map,[tf.shape(rpn_feat_map), rpn_feat_map,backbone_feat])
            rpn_cls_score, rpn_bbox_pred = xception_body.get_rpn(
                rpn_feat_map, num_anchors_list[0], False, FLAGS.data_format,
                'rpn_head')

            large_sep_feature = xception_body.large_sep_kernel(
                backbone_feat, 256, 10 * 7 * 7, False, FLAGS.data_format,
                'large_sep_feature')

            if FLAGS.data_format == 'channels_first':
                rpn_cls_score = tf.transpose(rpn_cls_score, [0, 2, 3, 1])
                rpn_bbox_pred = tf.transpose(rpn_bbox_pred, [0, 2, 3, 1])

            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_object_score = tf.nn.softmax(rpn_cls_score)[:, -1]

            rpn_object_score = tf.reshape(rpn_object_score, [1, -1])
            rpn_location_pred = tf.reshape(rpn_bbox_pred, [1, -1, 4])

            rpn_bboxes_pred = anchor_encoder_decoder.decode_all_anchors(
                [rpn_location_pred], squeeze_inner=True)[0]

            proposals_bboxes = xception_body.get_proposals(
                rpn_object_score, rpn_bboxes_pred, None,
                FLAGS.rpn_pre_nms_top_n, FLAGS.rpn_post_nms_top_n,
                FLAGS.rpn_nms_thres, FLAGS.rpn_min_size, False,
                FLAGS.data_format)

            cls_score, bboxes_reg = xception_body.get_head(
                large_sep_feature, lambda input_, bboxes_,
                grid_width_, grid_height_: ps_roi_align(
                    input_, bboxes_, grid_width_, grid_height_, pool_method),
                7, 7, None, proposals_bboxes, FLAGS.num_classes, False, False,
                0, FLAGS.data_format, 'final_head')

            head_bboxes_pred = anchor_encoder_decoder.ext_decode_rois(
                proposals_bboxes,
                bboxes_reg,
                head_prior_scaling=[1., 1., 1., 1.])

            head_cls_score = tf.reshape(cls_score, [-1, FLAGS.num_classes])
            head_cls_score = tf.nn.softmax(head_cls_score)
            head_bboxes_pred = tf.reshape(head_bboxes_pred, [-1, 4])

            with tf.device('/device:CPU:0'):
                selected_scores, selected_bboxes = eval_helper.tf_bboxes_select(
                    [head_cls_score], [head_bboxes_pred],
                    FLAGS.select_threshold,
                    FLAGS.num_classes,
                    scope='xdet_v2_select')

                selected_bboxes = eval_helper.bboxes_clip(
                    tf.constant([0., 0., 1., 1.]), selected_bboxes)
                selected_scores, selected_bboxes = eval_helper.filter_boxes(
                    selected_scores,
                    selected_bboxes,
                    0.03,
                    shape_input, [FLAGS.train_image_size] * 2,
                    keep_top_k=FLAGS.nms_topk * 2)

                # Resize bboxes to original image shape.
                selected_bboxes = eval_helper.bboxes_resize(
                    tf.constant([0., 0., 1., 1.]), selected_bboxes)

                selected_scores, selected_bboxes = eval_helper.bboxes_sort(
                    selected_scores, selected_bboxes, top_k=FLAGS.nms_topk * 2)

                # Apply NMS algorithm.
                selected_scores, selected_bboxes = eval_helper.bboxes_nms_batch(
                    selected_scores,
                    selected_bboxes,
                    nms_threshold=FLAGS.nms_threshold,
                    keep_top_k=FLAGS.nms_topk)

                labels_list = []
                for k, v in selected_scores.items():
                    labels_list.append(tf.ones_like(v, tf.int32) * k)
                all_labels = tf.concat(labels_list, axis=0)
                all_scores = tf.concat(list(selected_scores.values()), axis=0)
                all_bboxes = tf.concat(list(selected_bboxes.values()), axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, FLAGS.checkpoint_path)

            np_image = imread('./demo/test.jpg')
            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={
                    image_input: np_image,
                    shape_input: np_image.shape[:-1]
                })

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave(os.path.join(FLAGS.debug_dir, 'test_out.jpg'), img_to_draw)
def lighr_head_model_fn(features, labels, mode, params):
    """Our model_fn for ResNet to be used with our Estimator."""
    num_anchors_list = labels['num_anchors_list']
    num_feature_layers = len(num_anchors_list)

    shape = labels['targets'][-1]
    glabels = labels['targets'][:num_feature_layers][0]
    gtargets = labels['targets'][num_feature_layers:2 * num_feature_layers][0]
    gscores = labels['targets'][2 * num_feature_layers:3 *
                                num_feature_layers][0]

    #features = tf.ones([4,480,480,3]) * 0.5
    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        rpn_feat_map, backbone_feat = xception_body.XceptionBody(
            features,
            params['num_classes'],
            is_training=(mode == tf.estimator.ModeKeys.TRAIN),
            data_format=params['data_format'])
        #rpn_feat_map = tf.Print(rpn_feat_map,[tf.shape(rpn_feat_map), rpn_feat_map,backbone_feat])
        rpn_cls_score, rpn_bbox_pred = xception_body.get_rpn(
            rpn_feat_map, num_anchors_list[0],
            (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'],
            'rpn_head')

        large_sep_feature = xception_body.large_sep_kernel(
            backbone_feat, 256, 10 * 7 * 7,
            (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'],
            'large_sep_feature')

        if params['data_format'] == 'channels_first':
            rpn_cls_score = tf.transpose(rpn_cls_score, [0, 2, 3, 1])
            rpn_bbox_pred = tf.transpose(rpn_bbox_pred, [0, 2, 3, 1])

        rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
        rpn_object_score = tf.nn.softmax(rpn_cls_score)[:, -1]

        #with tf.device('/cpu:0'):
        rpn_object_score = tf.reshape(rpn_object_score,
                                      [params['batch_size'], -1])
        rpn_location_pred = tf.reshape(rpn_bbox_pred,
                                       [params['batch_size'], -1, 4])

        #rpn_location_pred = tf.Print(rpn_location_pred,[tf.shape(rpn_location_pred), rpn_location_pred])

        rpn_bboxes_pred = labels['rpn_decode_fn'](rpn_location_pred)

        #rpn_bboxes_pred = tf.Print(rpn_bboxes_pred,[tf.shape(rpn_bboxes_pred), rpn_bboxes_pred])
        # rpn loss here
        cls_pred = tf.reshape(rpn_cls_score, [-1, 2])
        location_pred = tf.reshape(rpn_bbox_pred, [-1, 4])
        glabels = tf.reshape(glabels, [-1])
        gscores = tf.reshape(gscores, [-1])
        gtargets = tf.reshape(gtargets, [-1, 4])

        expected_num_fg_rois = tf.cast(
            tf.round(
                tf.cast(params['batch_size'] * params['rpn_anchors_per_image'],
                        tf.float32) * params['rpn_fg_ratio']), tf.int32)

        def select_samples(cls_pred, location_pred, glabels, gscores,
                           gtargets):
            def upsampel_impl(now_count, need_count):
                # sample with replacement
                left_count = need_count - now_count
                select_indices = tf.random_shuffle(
                    tf.range(now_count))[:tf.floormod(left_count, now_count)]
                select_indices = tf.concat([
                    tf.tile(tf.range(now_count),
                            [tf.floor_div(left_count, now_count) + 1]),
                    select_indices
                ],
                                           axis=0)

                return select_indices

            def downsample_impl(now_count, need_count):
                # downsample with replacement
                select_indices = tf.random_shuffle(
                    tf.range(now_count))[:need_count]
                return select_indices

            positive_mask = glabels > 0
            positive_indices = tf.squeeze(tf.where(positive_mask), axis=-1)
            n_positives = tf.shape(positive_indices)[0]
            # either downsample or take all
            fg_select_indices = tf.cond(
                n_positives < expected_num_fg_rois, lambda: positive_indices,
                lambda: tf.gather(
                    positive_indices,
                    downsample_impl(n_positives, expected_num_fg_rois)))
            # now the all rois taken as positive is min(n_positives, expected_num_fg_rois)

            #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(tf.logical_or(positive_mask, glabels < 0)), gscores < params['rpn_neg_threshold']), gscores > 0.)
            negtive_mask = tf.equal(
                glabels,
                0)  #tf.logical_and(tf.equal(glabels, 0), gscores > 0.)
            negtive_indices = tf.squeeze(tf.where(negtive_mask), axis=-1)
            n_negtives = tf.shape(negtive_indices)[0]

            expected_num_bg_rois = params[
                'batch_size'] * params['rpn_anchors_per_image'] - tf.minimum(
                    n_positives, expected_num_fg_rois)
            # either downsample or take all
            bg_select_indices = tf.cond(
                n_negtives < expected_num_bg_rois, lambda: negtive_indices,
                lambda: tf.gather(
                    negtive_indices,
                    downsample_impl(n_negtives, expected_num_bg_rois)))
            # now the all rois taken as positive is min(n_negtives, expected_num_bg_rois)

            keep_indices = tf.concat([fg_select_indices, bg_select_indices],
                                     axis=0)
            n_keeps = tf.shape(keep_indices)[0]
            # now n_keeps must be equal or less than rpn_anchors_per_image
            final_keep_indices = tf.cond(
                n_keeps <
                params['batch_size'] * params['rpn_anchors_per_image'],
                lambda: tf.gather(
                    keep_indices,
                    upsampel_impl(
                        n_keeps, params['batch_size'] * params[
                            'rpn_anchors_per_image'])), lambda: keep_indices)

            return tf.gather(cls_pred, final_keep_indices), tf.gather(
                location_pred, final_keep_indices), tf.cast(
                    tf.gather(
                        tf.clip_by_value(glabels, 0, params['num_classes']),
                        final_keep_indices) > 0,
                    tf.int64), tf.gather(gscores,
                                         final_keep_indices), tf.gather(
                                             gtargets, final_keep_indices)

        cls_pred, location_pred, glabels, gscores, gtargets = select_samples(
            cls_pred, location_pred, glabels, gscores, gtargets)

        # Calculate loss, which includes softmax cross entropy and L2 regularization.
        rpn_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
            labels=glabels, logits=cls_pred)

        # Create a tensor named cross_entropy for logging purposes.
        rpn_cross_entropy = tf.identity(rpn_cross_entropy,
                                        name='rpn_cross_entropy_loss')
        tf.summary.scalar('rpn_cross_entropy_loss', rpn_cross_entropy)

        total_positive_mask = (glabels > 0)
        gtargets = tf.boolean_mask(gtargets,
                                   tf.stop_gradient(total_positive_mask))
        location_pred = tf.boolean_mask(location_pred,
                                        tf.stop_gradient(total_positive_mask))
        #gtargets = tf.Print(gtargets, [gtargets], message='gtargets:', summarize=100)

        rpn_l1_distance = modified_smooth_l1(location_pred, gtargets, sigma=1.)
        rpn_loc_loss = tf.reduce_mean(tf.reduce_sum(
            rpn_l1_distance, axis=-1)) / params['rpn_fg_ratio']
        rpn_loc_loss = tf.identity(rpn_loc_loss, name='rpn_location_loss')
        tf.summary.scalar('rpn_location_loss', rpn_loc_loss)
        tf.losses.add_loss(rpn_loc_loss)

        rpn_loss = tf.identity(rpn_loc_loss + rpn_cross_entropy,
                               name='rpn_loss')
        tf.summary.scalar('rpn_loss', rpn_loss)
        #print(rpn_loc_loss)

        proposals_bboxes, proposals_targets, proposals_labels, proposals_scores = xception_body.get_proposals(
            rpn_object_score, rpn_bboxes_pred, labels['rpn_encode_fn'],
            params['rpn_pre_nms_top_n'], params['rpn_post_nms_top_n'],
            params['rpn_nms_thres'], params['rpn_min_size'],
            (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'])

        #proposals_targets = tf.Print(proposals_targets, [proposals_targets], message='proposals_targets0:')
        def head_loss_func(cls_score, bboxes_reg, select_indices,
                           proposals_targets, proposals_labels):
            if select_indices is not None:
                proposals_targets = tf.gather(proposals_targets,
                                              select_indices,
                                              axis=1)
                proposals_labels = tf.gather(proposals_labels,
                                             select_indices,
                                             axis=1)
            # Calculate loss, which includes softmax cross entropy and L2 regularization.
            head_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=proposals_labels, logits=cls_score)

            total_positive_mask = tf.cast((proposals_labels > 0), tf.float32)
            # proposals_targets = tf.boolean_mask(proposals_targets, tf.stop_gradient(total_positive_mask))
            # bboxes_reg = tf.boolean_mask(bboxes_reg, tf.stop_gradient(total_positive_mask))
            head_loc_loss = modified_smooth_l1(bboxes_reg,
                                               proposals_targets,
                                               sigma=1.)
            head_loc_loss = tf.reduce_sum(head_loc_loss,
                                          axis=-1) * total_positive_mask
            if (params['using_ohem'] and
                (select_indices is not None)) or (not params['using_ohem']):
                head_cross_entropy_loss = tf.reduce_mean(head_cross_entropy)
                head_cross_entropy_loss = tf.identity(
                    head_cross_entropy_loss, name='head_cross_entropy_loss')
                tf.summary.scalar('head_cross_entropy_loss',
                                  head_cross_entropy_loss)

                head_location_loss = tf.reduce_mean(
                    head_loc_loss) / params['fg_ratio']
                head_location_loss = tf.identity(head_location_loss,
                                                 name='head_location_loss')
                tf.summary.scalar('head_location_loss', head_location_loss)

            return head_cross_entropy + head_loc_loss / params['fg_ratio']

        head_loss = xception_body.get_head(
            large_sep_feature,
            lambda input_, bboxes_, grid_width_, grid_height_: ps_roi_align(
                input_, bboxes_, grid_width_, grid_height_, pool_method), 7,
            7, lambda cls, bbox, indices: head_loss_func(
                cls, bbox, indices, proposals_targets, proposals_labels),
            proposals_bboxes, params['num_classes'],
            (mode == tf.estimator.ModeKeys.TRAIN), params['using_ohem'],
            params['ohem_roi_one_image'], params['data_format'], 'final_head')

        # Create a tensor named cross_entropy for logging purposes.
        head_loss = tf.identity(head_loss, name='head_loss')
        tf.summary.scalar('head_loss', head_loss)

        tf.losses.add_loss(head_loss)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=None)

    # Add weight decay to the loss. We exclude the batch norm variables because
    # doing so leads to a small improvement in accuracy.
    loss = rpn_cross_entropy + rpn_loc_loss + head_loss + params[
        'weight_decay'] * tf.add_n([
            tf.nn.l2_loss(v) for v in tf.trainable_variables() if
            (('batch_normalization' not in v.name) and ('_bn' not in v.name))
        ])  #_bn
    total_loss = tf.identity(loss, name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_or_create_global_step()

        lr_values = [
            params['learning_rate'] * decay
            for decay in params['lr_decay_factors']
        ]
        learning_rate = tf.train.piecewise_constant(
            tf.cast(global_step, tf.int32),
            [int(_) for _ in params['decay_boundaries']], lr_values)
        truncated_learning_rate = tf.maximum(
            learning_rate,
            tf.constant(params['end_learning_rate'],
                        dtype=learning_rate.dtype))
        # Create a tensor named learning_rate for logging purposes.
        tf.identity(truncated_learning_rate, name='learning_rate')
        tf.summary.scalar('learning_rate', truncated_learning_rate)

        optimizer = tf.train.MomentumOptimizer(
            learning_rate=truncated_learning_rate, momentum=params['momentum'])

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)
    else:
        train_op = None

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=None,
        loss=loss,
        train_op=train_op,
        eval_metric_ops=None,
        scaffold=tf.train.Scaffold(
            init_fn=train_helper.get_init_fn_for_scaffold(FLAGS)))
def lighr_head_model_fn(features, labels, mode, params):
    """Our model_fn for ResNet to be used with our Estimator."""
    num_anchors_list = labels['num_anchors_list']
    num_feature_layers = len(num_anchors_list)

    shape = labels['targets'][-1]
    if mode != tf.estimator.ModeKeys.TRAIN:
        org_image = labels['targets'][-2]
        isdifficult = labels['targets'][-3]
        bbox_img = labels['targets'][-4]
        gbboxes_raw = labels['targets'][-5]
        glabels_raw = labels['targets'][-6]

    glabels = labels['targets'][:num_feature_layers][0]
    gtargets = labels['targets'][num_feature_layers:2 * num_feature_layers][0]
    gscores = labels['targets'][2 * num_feature_layers:3 *
                                num_feature_layers][0]

    #features = tf.ones([4,480,480,3]) * 0.5
    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        rpn_feat_map, backbone_feat = xception_body.XceptionBody(
            features,
            params['num_classes'],
            is_training=(mode == tf.estimator.ModeKeys.TRAIN),
            data_format=params['data_format'])
        #rpn_feat_map = tf.Print(rpn_feat_map,[tf.shape(rpn_feat_map), rpn_feat_map,backbone_feat])
        rpn_cls_score, rpn_bbox_pred = xception_body.get_rpn(
            rpn_feat_map, num_anchors_list[0],
            (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'],
            'rpn_head')

        large_sep_feature = xception_body.large_sep_kernel(
            backbone_feat, 256, 10 * 7 * 7,
            (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'],
            'large_sep_feature')

        if params['data_format'] == 'channels_first':
            rpn_cls_score = tf.transpose(rpn_cls_score, [0, 2, 3, 1])
            rpn_bbox_pred = tf.transpose(rpn_bbox_pred, [0, 2, 3, 1])

        rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
        rpn_object_score = tf.nn.softmax(rpn_cls_score)[:, -1]

        rpn_object_score = tf.reshape(rpn_object_score, [1, -1])
        rpn_location_pred = tf.reshape(rpn_bbox_pred, [1, -1, 4])

        rpn_bboxes_pred = labels['rpn_decode_fn'](rpn_location_pred)

        proposals_bboxes = xception_body.get_proposals(
            rpn_object_score, rpn_bboxes_pred, None,
            params['rpn_pre_nms_top_n'], params['rpn_post_nms_top_n'],
            params['rpn_nms_thres'], params['rpn_min_size'],
            (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'])
        #proposals_targets = tf.Print(proposals_targets, [proposals_targets], message='proposals_targets0:')

        cls_score, bboxes_reg = xception_body.get_head(
            large_sep_feature,
            lambda input_, bboxes_, grid_width_, grid_height_: ps_roi_align(
                input_, bboxes_, grid_width_, grid_height_, pool_method), 7, 7,
            None, proposals_bboxes, params['num_classes'],
            (mode == tf.estimator.ModeKeys.TRAIN), False, 0,
            params['data_format'], 'final_head')

        head_bboxes_pred = labels['head_decode_fn'](proposals_bboxes,
                                                    bboxes_reg)

        head_cls_score = tf.reshape(cls_score, [-1, params['num_classes']])
        head_cls_score = tf.nn.softmax(head_cls_score)
        head_bboxes_pred = tf.reshape(head_bboxes_pred, [-1, 4])

        shape = tf.squeeze(shape, axis=0)
        glabels = tf.squeeze(glabels, axis=0)
        gtargets = tf.squeeze(gtargets, axis=0)
        gscores = tf.squeeze(gscores, axis=0)
        if mode != tf.estimator.ModeKeys.TRAIN:
            org_image = tf.squeeze(org_image, axis=0)
            isdifficult = tf.squeeze(isdifficult, axis=0)
            gbboxes_raw = tf.squeeze(gbboxes_raw, axis=0)
            glabels_raw = tf.squeeze(glabels_raw, axis=0)
            bbox_img = tf.squeeze(bbox_img, axis=0)

        eval_ops, save_image_op = bboxes_eval(org_image, shape, bbox_img,
                                              cls_score, head_bboxes_pred,
                                              glabels_raw, gbboxes_raw,
                                              isdifficult,
                                              params['num_classes'])
        _ = tf.identity(save_image_op, name='save_image_with_bboxes_op')

    with tf.control_dependencies([save_image_op]):
        weight_decay_loss = params['weight_decay'] * tf.add_n([
            tf.nn.l2_loss(v) for v in tf.trainable_variables()
            if 'batch_normalization' not in v.name
        ])

    predictions = {
        'classes': tf.argmax(head_cls_score, axis=-1),
        'probabilities': tf.reduce_max(head_cls_score, axis=-1),
        'bboxes_predict': head_bboxes_pred,
        'saved_image_index': save_image_op
    }

    summary_hook = tf.train.SummarySaverHook(
        save_secs=FLAGS.save_summary_steps,
        output_dir=FLAGS.model_dir,
        summary_op=tf.summary.merge_all())

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          evaluation_hooks=[summary_hook],
                                          loss=weight_decay_loss,
                                          eval_metric_ops=eval_ops)
    else:
        raise ValueError('This script only support predict mode!')