コード例 #1
0
def slim_get_split(file_pattern='{}_????'):
    # Features in Pascal VOC TFRecords.
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/channels': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape': slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox': slim.tfexample_decoder.BoundingBox(
            ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
        'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
        'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)

    dataset = slim.dataset.Dataset(
        data_sources=file_pattern,
        reader=tf.TFRecordReader,
        decoder=decoder,
        num_samples=100,
        items_to_descriptions=None,
        num_classes=21,
        labels_to_names=None)

    with tf.name_scope('dataset_data_provider'):
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=2,
            common_queue_capacity=32,
            common_queue_min=8,
            shuffle=True,
            num_epochs=1)

    [org_image, shape, glabels_raw, gbboxes_raw, isdifficult] = provider.get(['image', 'shape',
                                                                              'object/label',
                                                                              'object/bbox',
                                                                              'object/difficult'])
    image, glabels, gbboxes = ssd_preprocessing.preprocess_image(org_image, glabels_raw, gbboxes_raw, [300, 300],
                                                                 is_training=True, data_format='channels_last',
                                                                 output_rgb=True)

    anchor_creator = anchor_manipulator.AnchorCreator([300] * 2,
                                                      layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                                                                     (1, 1)],
                                                      anchor_scales=[(0.1,), (0.2,), (0.375,), (0.55,), (0.725,),
                                                                     (0.9,)],
                                                      extra_anchor_scales=[(0.1414,), (0.2739,), (0.4541,), (0.6315,),
                                                                           (0.8078,), (0.9836,)],
                                                      anchor_ratios=[(2., .5), (2., 3., .5, 0.3333),
                                                                     (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
                                                                     (2., .5), (2., .5)],
                                                      layer_steps=[8, 16, 32, 64, 100, 300])

    all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()

    num_anchors_per_layer = []
    for ind in range(len(all_anchors)):
        num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind])

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders=[1.0] * 6,
                                                              positive_threshold=0.5,
                                                              ignore_threshold=0.5,
                                                              prior_scaling=[0.1, 0.1, 0.2, 0.2])

    gt_targets, gt_labels, gt_scores = anchor_encoder_decoder.encode_all_anchors(glabels, gbboxes, all_anchors,
                                                                                 all_num_anchors_depth,
                                                                                 all_num_anchors_spatial, True)

    anchors = anchor_encoder_decoder._all_anchors
    # split by layers
    gt_targets, gt_labels, gt_scores, anchors = tf.split(gt_targets, num_anchors_per_layer, axis=0), \
                                                tf.split(gt_labels, num_anchors_per_layer, axis=0), \
                                                tf.split(gt_scores, num_anchors_per_layer, axis=0), \
                                                [tf.split(anchor, num_anchors_per_layer, axis=0) for anchor in anchors]

    save_image_op = tf.py_func(save_image_with_bbox,
                               [ssd_preprocessing.unwhiten_image(image),
                                tf.clip_by_value(tf.concat(gt_labels, axis=0), 0, tf.int64.max),
                                tf.concat(gt_scores, axis=0),
                                tf.concat(gt_targets, axis=0)],
                               tf.int64, stateful=True)
    return save_image_op
コード例 #2
0
ファイル: eval_ssd.py プロジェクト: Liu-Yicheng/SSD
def ssd_model_fn(features, labels, mode, params):
    """model_fn for SSD to be used with our Estimator."""
    filename = features['filename']
    filename = tf.identity(filename, name='filename')
    shape = features['shape']
    output_shape = features['output_shape']
    features = features['image']

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(positive_threshold=None, ignore_threshold=None, prior_scaling=[0.1, 0.1, 0.2, 0.2])
    all_anchor_scales = [(30.,), (60.,), (112.5,), (165.,), (217.5,), (270.,)]
    all_extra_scales = [(42.43,), (82.17,), (136.23,), (189.45,), (242.34,), (295.08,)]
    all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)]
    #all_anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)]

    with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE):
        backbone = ssd_net.VGG16Backbone(params['data_format'])
        # forward features
        feature_layers = backbone.forward(features, training=(mode == tf.estimator.ModeKeys.TRAIN))
        # generate anchors according to the feature map size
        with tf.device('/cpu:0'):
            if params['data_format'] == 'channels_first':
                all_layer_shapes = [tf.shape(feat)[2:] for feat in feature_layers]
            else:
                all_layer_shapes = [tf.shape(feat)[1:3] for feat in feature_layers]
            all_layer_strides = [8, 16, 32, 64, 100, 300]
            total_layers = len(all_layer_shapes)
            anchors_height = list()
            anchors_width = list()
            anchors_depth = list()
            for ind in range(total_layers):
                _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(all_anchor_scales[ind], all_extra_scales[ind], all_anchor_ratios[ind], name='get_anchors_width_height{}'.format(ind))
                anchors_height.append(_anchors_height)
                anchors_width.append(_anchors_width)
                anchors_depth.append(_anchor_depth)
            anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, _ = anchor_encoder_decoder.get_all_anchors(tf.squeeze(output_shape, axis=0),
                                                                            anchors_height, anchors_width, anchors_depth,
                                                                            [0.5] * total_layers, all_layer_shapes, all_layer_strides,
                                                                            [0.] * total_layers, [False] * total_layers)
        # generate predictions based on anchors
        location_pred, cls_pred = ssd_net.multibox_head(feature_layers, params['num_classes'], anchors_depth, data_format=params['data_format'])
        if params['data_format'] == 'channels_first':
            cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
            location_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred]

        cls_pred = [tf.reshape(pred, [tf.shape(features)[0], -1, params['num_classes']]) for pred in cls_pred]
        location_pred = [tf.reshape(pred, [tf.shape(features)[0], -1, 4]) for pred in location_pred]

        cls_pred = tf.concat(cls_pred, axis=1)
        location_pred = tf.concat(location_pred, axis=1)

        cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
        location_pred = tf.reshape(location_pred, [-1, 4])
    # decode predictions
    with tf.device('/cpu:0'):
        bboxes_pred = anchor_encoder_decoder.decode_anchors(location_pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax)
        selected_bboxes, selected_scores = bbox_util.parse_by_class(tf.squeeze(output_shape, axis=0), cls_pred, bboxes_pred,
                                                        params['num_classes'], params['select_threshold'], params['min_size'],
                                                        params['keep_topk'], params['nms_topk'], params['nms_threshold'])

    labels_list = []
    scores_list = []
    bboxes_list = []
    for k, v in selected_scores.items():
        labels_list.append(tf.ones_like(v, tf.int32) * k)
        scores_list.append(v)
        bboxes_list.append(selected_bboxes[k])
    all_labels = tf.concat(labels_list, axis=0)
    all_scores = tf.concat(scores_list, axis=0)
    all_bboxes = tf.concat(bboxes_list, axis=0)
    save_image_op = tf.py_func(save_image_with_bbox,
                        [ssd_preprocessing.unwhiten_image(tf.squeeze(features, axis=0), output_rgb=False),
                        all_labels * tf.to_int32(all_scores > 0.3),
                        all_scores,
                        all_bboxes],
                        tf.int64, stateful=True)
    tf.identity(save_image_op, name='save_image_op')
    predictions = {'filename': filename, 'shape': shape, 'output_shape': output_shape }
    for class_ind in range(1, params['num_classes']):
        predictions['scores_{}'.format(class_ind)] = tf.expand_dims(selected_scores[class_ind], axis=0)
        predictions['bboxes_{}'.format(class_ind)] = tf.expand_dims(selected_bboxes[class_ind], axis=0)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
              mode=mode,
              predictions=predictions,
              prediction_hooks=None, loss=None, train_op=None)
    else:
        raise ValueError('This script only support "PREDICT" mode!')
コード例 #3
0
def slim_get_split(file_pattern='{}_????'):
    # Features in Pascal VOC TFRecords.
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string,
                                           default_value='jpeg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/channels': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/difficult':
        slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
        'object/truncated':
        slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    dataset = slim.dataset.Dataset(data_sources=file_pattern,
                                   reader=tf.TFRecordReader,
                                   decoder=decoder,
                                   num_samples=100,
                                   items_to_descriptions=None,
                                   num_classes=21,
                                   labels_to_names=None)

    with tf.name_scope('dataset_data_provider'):
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=2,
            common_queue_capacity=32,
            common_queue_min=8,
            shuffle=True,
            num_epochs=1)

    [org_image, shape, glabels_raw, gbboxes_raw, isdifficult] = provider.get(
        ['image', 'shape', 'object/label', 'object/bbox', 'object/difficult'])
    image, glabels, gbboxes = ssd_preprocessing.preprocess_image(
        org_image,
        glabels_raw,
        gbboxes_raw, [300, 300],
        is_training=True,
        data_format='channels_last',
        output_rgb=True)

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
        positive_threshold=0.5,
        ignore_threshold=0.5,
        prior_scaling=[0.1, 0.1, 0.2, 0.2])

    all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ),
                         (270., )]
    all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ),
                        (242.34, ), (295.08, )]
    all_anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
                         (2., 3., .5, 0.3333), (2., .5), (2., .5)]
    all_layer_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]
    all_layer_strides = [8, 16, 32, 64, 100, 300]
    total_layers = len(all_layer_shapes)
    anchors_height = list()
    anchors_width = list()
    anchors_depth = list()
    for ind in range(total_layers):
        _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(
            all_anchor_scales[ind], all_extra_scales[ind],
            all_anchor_ratios[ind])
        anchors_height.append(_anchors_height)
        anchors_width.append(_anchors_width)
        anchors_depth.append(_anchor_depth)
    anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask = anchor_encoder_decoder.get_all_anchors(
        [300] * 2, anchors_height, anchors_width, anchors_depth,
        [0.5] * total_layers, all_layer_shapes, all_layer_strides,
        [300.] * total_layers, [False] * total_layers)

    gt_targets, gt_labels, gt_scores = anchor_encoder_decoder.encode_anchors(
        glabels, gbboxes, anchors_ymin, anchors_xmin, anchors_ymax,
        anchors_xmax, inside_mask, True)

    num_anchors_per_layer = list()
    for ind, layer_shape in enumerate(all_layer_shapes):
        _, _num_anchors_per_layer = anchor_encoder_decoder.get_anchors_count(
            anchors_depth[ind], layer_shape)
        num_anchors_per_layer.append(_num_anchors_per_layer)

    # split by layers
    all_anchors = tf.stack(
        [anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax], axis=-1)

    gt_targets, gt_labels, gt_scores, anchors = tf.split(gt_targets, num_anchors_per_layer, axis=0),\
                                                tf.split(gt_labels, num_anchors_per_layer, axis=0),\
                                                tf.split(gt_scores, num_anchors_per_layer, axis=0),\
                                                tf.split(all_anchors, num_anchors_per_layer, axis=0)

    save_image_op = tf.py_func(save_image_with_bbox, [
        ssd_preprocessing.unwhiten_image(image),
        tf.clip_by_value(tf.concat(gt_labels, axis=0), 0, tf.int64.max),
        tf.concat(gt_scores, axis=0),
        tf.concat(gt_targets, axis=0)
    ],
                               tf.int64,
                               stateful=True)
    return save_image_op