Ejemplo n.º 1
0
    def input_fn():
        with tf.name_scope('post_forward'):
            out_shape = [FLAGS.train_image_size] * 2
            anchor_creator = anchor_manipulator.AnchorCreator(
                out_shape,
                layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                               (1, 1)],
                anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ),
                               (0.725, ), (0.9, )],
                extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                     (0.6315, ), (0.8078, ), (0.9836, )],
                anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                               (1., 2., 3., .5, 0.3333),
                               (1., 2., 3., .5, 0.3333), (1., 2., .5),
                               (1., 2., .5)],
                layer_steps=[8, 16, 32, 64, 100, 300])
            all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
            )

            num_anchors_per_layer = []
            for ind in range(len(all_anchors)):
                num_anchors_per_layer.append(all_num_anchors_depth[ind] *
                                             all_num_anchors_spatial[ind])

            anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
                allowed_borders=[1.0] * 6,
                positive_threshold=FLAGS.match_threshold,
                ignore_threshold=FLAGS.neg_threshold,
                prior_scaling=[0.1, 0.1, 0.2, 0.2])

            #             global global_anchor_info
            #             global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer),
            #                                 'num_anchors_per_layer': num_anchors_per_layer,
            #                                 'all_num_anchors_depth': all_num_anchors_depth,
            #                                 'encode_fn': lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)}

            image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image(
                image_,
                labels_,
                bboxes_,
                out_shape,
                is_training=is_training,
                data_format=FLAGS.data_format,
                output_rgb=False)

            #             anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

            filenames = tf.placeholder(tf.string, shape=[None])
            arga = tf.constant(True)

            dataset = tf.data.TFRecordDataset(training_files)
            dataset = dataset.map(lambda x: data_mapping_fn(
                x, is_training, image_preprocessing_fn))
            dataset = dataset.repeat()  # repeat the input infinitely
            dataset = dataset.batch(batch_size)  # set the batch size
            iterator = dataset.make_initializable_iterator()

            # return image, {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}
            return dataset
Ejemplo n.º 2
0
 def image_preprocessing_fn(image_, labels_, bboxes_):
     return ssd_preprocessing.preprocess_image(image_,
                                               labels_,
                                               bboxes_,
                                               out_shape,
                                               is_training=False,
                                               data_format=DATA_FORMAT,
                                               output_rgb=False)
Ejemplo n.º 3
0
 def image_preprocessing_fn(image_, labels_, bboxes_):
     return ssd_preprocessing.preprocess_image(
         image_,
         labels_,
         bboxes_,
         out_shape,
         is_training=is_training,
         data_format=FLAGS.data_format,
         output_rgb=False)
Ejemplo n.º 4
0
def input_fn(dataset_pattern='val-*', batch_size=1, data_location=None):
    out_shape = [SSD_VGG16_IMAGE_SIZE] * 2
    anchor_creator = anchor_manipulator.AnchorCreator(
        out_shape,
        layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
        anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                       (0.9, )],
        extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ), (0.6315, ),
                             (0.8078, ), (0.9836, )],
        anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                       (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                       (1., 2., .5), (1., 2., .5)],
        layer_steps=[8, 16, 32, 64, 100, 300])
    all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
    )

    num_anchors_per_layer = []
    for ind in range(len(all_anchors)):
        num_anchors_per_layer.append(all_num_anchors_depth[ind] *
                                     all_num_anchors_spatial[ind])

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
        allowed_borders=[1.0] * 6,
        positive_threshold=MATCH_THRESHOLD,
        ignore_threshold=NEG_THRESHOLD,
        prior_scaling=[0.1, 0.1, 0.2, 0.2])

    image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image(
        image_,
        labels_,
        bboxes_,
        out_shape,
        is_training=False,
        data_format=DATA_FORMAT,
        output_rgb=False)
    anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(
        glabels_, gbboxes_, all_anchors, all_num_anchors_depth,
        all_num_anchors_spatial)

    image, filename, shape, loc_targets, cls_targets, match_scores = \
        dataset_common.slim_get_batch(NUM_CLASSES,
                                      batch_size,
                                      'val',
                                      os.path.join(
                                          data_location,
                                          dataset_pattern),
                                      NUM_READERS,
                                      NUM_PREPROCESSING_THREADS,
                                      image_preprocessing_fn,
                                      anchor_encoder_fn,
                                      num_epochs=1,
                                      is_training=False)
    return image, filename, shape
Ejemplo n.º 5
0
    def input_fn():
        out_shape = [FLAGS.train_image_size] * 2
        ssd300_anchor_params = {'layers_shapes': [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], 'anchor_scales': [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)],
                                                    'extra_anchor_scales': [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)],
                                                    'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    'layer_steps': [8, 16, 32, 64, 100, 300]}
        ssd512_anchor_params = {'layers_shapes': [(64, 64), (32, 32), (16, 16), (8, 8), (4, 4), (2, 2), (1, 1)],
                                                    'anchor_scales': [(0.07,), (0.15,), (0.3,), (0.45,), (0.6,), (0.75,), (0.9,)],
                                                    'extra_anchor_scales': [(0.1025,), (0.2121,), (0.3674,), (0.5196,), (0.6708,), (0.8216,), (0.9721,)],
                                                    'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    'layer_steps': [8, 16, 32, 64, 128, 256, 512]}
        if FLAGS.train_image_size == 512:
            net_params = ssd512_anchor_params
            print('using ssd512 model')
        else:
            net_params = ssd300_anchor_params
            print('using ssd300 model')
        anchor_creator = anchor_manipulator.AnchorCreator(out_shape, **net_params)
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()

        num_anchors_per_layer = []
        for ind in range(len(all_anchors)):
            num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind])

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * len(net_params['layer_steps']),
                                                            positive_threshold = FLAGS.match_threshold,
                                                            ignore_threshold = FLAGS.neg_threshold,
                                                            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False)
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        image, filename, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(FLAGS.num_classes,
                                                                                batch_size,
                                                                                ('train' if is_training else 'val'),
                                                                                os.path.join(FLAGS.data_dir, dataset_pattern),
                                                                                FLAGS.num_readers,
                                                                                FLAGS.num_preprocessing_threads,
                                                                                image_preprocessing_fn,
                                                                                anchor_encoder_fn,
                                                                                num_epochs=FLAGS.train_epochs,
                                                                                is_training=is_training)
        global global_anchor_info
        global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer),
                            'num_anchors_per_layer': num_anchors_per_layer,
                            'all_num_anchors_depth': all_num_anchors_depth }

        return {'image': image, 'filename': filename, 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}, None
Ejemplo n.º 6
0
    def input_fn():
        assert batch_size==1, 'We only support single batch when evaluation.'
        target_shape = [FLAGS.train_image_size] * 2
        image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False)

        image, filename, shape, output_shape = dataset_common.slim_get_batch(FLAGS.num_classes,
                                                                            batch_size,
                                                                            ('train' if is_training else 'val'),
                                                                            os.path.join(FLAGS.data_dir, dataset_pattern),
                                                                            FLAGS.num_readers,
                                                                            FLAGS.num_preprocessing_threads,
                                                                            image_preprocessing_fn,
                                                                            None,
                                                                            num_epochs=1,
                                                                            is_training=is_training)

        return {'image': image, 'filename': filename, 'shape': shape, 'output_shape': output_shape}, None
Ejemplo n.º 7
0
    def input_fn():
        out_shape = [FLAGS.train_image_size] * 2
        anchor_creator = anchor_manipulator.AnchorCreator(out_shape,
                                                    layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
                                                    anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)],
                                                    extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)],
                                                    anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    layer_steps = [8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()
        # all_anchors: [[(38x38x1),(38x38x1),(4x1),(4x1)],[(19x19x1),(19x19x1),(4x1),(4x1)]... ] -> recording all the anchors information

        num_anchors_per_layer = []
        for ind in range(len(all_anchors)):
            num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind])

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6,
                                                            positive_threshold = FLAGS.match_threshold,
                                                            ignore_threshold = FLAGS.neg_threshold,
                                                            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False)
        
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)
        
        anchor_decoder_fn = lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer)

        image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(FLAGS.num_classes,
                                                                                batch_size,
                                                                                ('train' if is_training else 'val'),
                                                                                os.path.join(FLAGS.data_dir, dataset_pattern),
                                                                                FLAGS.num_readers,
                                                                                FLAGS.num_preprocessing_threads,
                                                                                image_preprocessing_fn,
                                                                                anchor_encoder_fn,
                                                                                num_epochs=FLAGS.train_epochs,
                                                                                is_training=is_training)
        global global_anchor_info
        global_anchor_info = {'decode_fn': anchor_decoder_fn,
                            'num_anchors_per_layer': num_anchors_per_layer,
                            'all_num_anchors_depth': all_num_anchors_depth }

        return image, {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}
Ejemplo n.º 8
0
def slim_get_split(file_pattern='{}_????'):
    # Features in Pascal VOC TFRecords.
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/channels': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape': slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox': slim.tfexample_decoder.BoundingBox(
            ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
        'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
        'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)

    dataset = slim.dataset.Dataset(
        data_sources=file_pattern,
        reader=tf.TFRecordReader,
        decoder=decoder,
        num_samples=100,
        items_to_descriptions=None,
        num_classes=21,
        labels_to_names=None)

    with tf.name_scope('dataset_data_provider'):
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=2,
            common_queue_capacity=32,
            common_queue_min=8,
            shuffle=True,
            num_epochs=1)

    [org_image, shape, glabels_raw, gbboxes_raw, isdifficult] = provider.get(['image', 'shape',
                                                                              'object/label',
                                                                              'object/bbox',
                                                                              'object/difficult'])
    image, glabels, gbboxes = ssd_preprocessing.preprocess_image(org_image, glabels_raw, gbboxes_raw, [300, 300],
                                                                 is_training=True, data_format='channels_last',
                                                                 output_rgb=True)

    anchor_creator = anchor_manipulator.AnchorCreator([300] * 2,
                                                      layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                                                                     (1, 1)],
                                                      anchor_scales=[(0.1,), (0.2,), (0.375,), (0.55,), (0.725,),
                                                                     (0.9,)],
                                                      extra_anchor_scales=[(0.1414,), (0.2739,), (0.4541,), (0.6315,),
                                                                           (0.8078,), (0.9836,)],
                                                      anchor_ratios=[(2., .5), (2., 3., .5, 0.3333),
                                                                     (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
                                                                     (2., .5), (2., .5)],
                                                      layer_steps=[8, 16, 32, 64, 100, 300])

    all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()

    num_anchors_per_layer = []
    for ind in range(len(all_anchors)):
        num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind])

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders=[1.0] * 6,
                                                              positive_threshold=0.5,
                                                              ignore_threshold=0.5,
                                                              prior_scaling=[0.1, 0.1, 0.2, 0.2])

    gt_targets, gt_labels, gt_scores = anchor_encoder_decoder.encode_all_anchors(glabels, gbboxes, all_anchors,
                                                                                 all_num_anchors_depth,
                                                                                 all_num_anchors_spatial, True)

    anchors = anchor_encoder_decoder._all_anchors
    # split by layers
    gt_targets, gt_labels, gt_scores, anchors = tf.split(gt_targets, num_anchors_per_layer, axis=0), \
                                                tf.split(gt_labels, num_anchors_per_layer, axis=0), \
                                                tf.split(gt_scores, num_anchors_per_layer, axis=0), \
                                                [tf.split(anchor, num_anchors_per_layer, axis=0) for anchor in anchors]

    save_image_op = tf.py_func(save_image_with_bbox,
                               [ssd_preprocessing.unwhiten_image(image),
                                tf.clip_by_value(tf.concat(gt_labels, axis=0), 0, tf.int64.max),
                                tf.concat(gt_scores, axis=0),
                                tf.concat(gt_targets, axis=0)],
                               tf.int64, stateful=True)
    return save_image_op
Ejemplo n.º 9
0
def slim_get_split(file_pattern='{}_????'):
    # Features in Pascal VOC TFRecords.
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string,
                                           default_value='jpeg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/channels': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/difficult':
        slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
        'object/truncated':
        slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    dataset = slim.dataset.Dataset(data_sources=file_pattern,
                                   reader=tf.TFRecordReader,
                                   decoder=decoder,
                                   num_samples=100,
                                   items_to_descriptions=None,
                                   num_classes=21,
                                   labels_to_names=None)

    with tf.name_scope('dataset_data_provider'):
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=2,
            common_queue_capacity=32,
            common_queue_min=8,
            shuffle=True,
            num_epochs=1)

    [org_image, shape, glabels_raw, gbboxes_raw, isdifficult] = provider.get(
        ['image', 'shape', 'object/label', 'object/bbox', 'object/difficult'])
    image, glabels, gbboxes = ssd_preprocessing.preprocess_image(
        org_image,
        glabels_raw,
        gbboxes_raw, [300, 300],
        is_training=True,
        data_format='channels_last',
        output_rgb=True)

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
        positive_threshold=0.5,
        ignore_threshold=0.5,
        prior_scaling=[0.1, 0.1, 0.2, 0.2])

    all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ),
                         (270., )]
    all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ),
                        (242.34, ), (295.08, )]
    all_anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
                         (2., 3., .5, 0.3333), (2., .5), (2., .5)]
    all_layer_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]
    all_layer_strides = [8, 16, 32, 64, 100, 300]
    total_layers = len(all_layer_shapes)
    anchors_height = list()
    anchors_width = list()
    anchors_depth = list()
    for ind in range(total_layers):
        _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(
            all_anchor_scales[ind], all_extra_scales[ind],
            all_anchor_ratios[ind])
        anchors_height.append(_anchors_height)
        anchors_width.append(_anchors_width)
        anchors_depth.append(_anchor_depth)
    anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask = anchor_encoder_decoder.get_all_anchors(
        [300] * 2, anchors_height, anchors_width, anchors_depth,
        [0.5] * total_layers, all_layer_shapes, all_layer_strides,
        [300.] * total_layers, [False] * total_layers)

    gt_targets, gt_labels, gt_scores = anchor_encoder_decoder.encode_anchors(
        glabels, gbboxes, anchors_ymin, anchors_xmin, anchors_ymax,
        anchors_xmax, inside_mask, True)

    num_anchors_per_layer = list()
    for ind, layer_shape in enumerate(all_layer_shapes):
        _, _num_anchors_per_layer = anchor_encoder_decoder.get_anchors_count(
            anchors_depth[ind], layer_shape)
        num_anchors_per_layer.append(_num_anchors_per_layer)

    # split by layers
    all_anchors = tf.stack(
        [anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax], axis=-1)

    gt_targets, gt_labels, gt_scores, anchors = tf.split(gt_targets, num_anchors_per_layer, axis=0),\
                                                tf.split(gt_labels, num_anchors_per_layer, axis=0),\
                                                tf.split(gt_scores, num_anchors_per_layer, axis=0),\
                                                tf.split(all_anchors, num_anchors_per_layer, axis=0)

    save_image_op = tf.py_func(save_image_with_bbox, [
        ssd_preprocessing.unwhiten_image(image),
        tf.clip_by_value(tf.concat(gt_labels, axis=0), 0, tf.int64.max),
        tf.concat(gt_scores, axis=0),
        tf.concat(gt_targets, axis=0)
    ],
                               tf.int64,
                               stateful=True)
    return save_image_op
Ejemplo n.º 10
0
    def input_fn():
        #train_imgage_size = 300 [300, 300]
        target_shape = [FLAGS.train_image_size] * 2

        #match_threshold:0.5
        #neg_threshold:0.5
        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            positive_threshold=FLAGS.match_threshold,
            ignore_threshold=FLAGS.neg_threshold,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ),
                             (270., )]
        all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ),
                            (242.34, ), (295.08, )]
        all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333), (1., 2., .5),
                             (1., 2., .5)]
        all_layer_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                            (1, 1)]
        all_layer_strides = [8, 16, 32, 64, 100, 300]
        total_layers = len(all_layer_shapes)
        anchors_height = list()
        anchors_width = list()
        anchors_depth = list()
        for ind in range(total_layers):
            #若该层有n个default_prior_box则anchors_height是这些box的h,_anchor_depth是n
            _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(
                all_anchor_scales[ind],
                all_extra_scales[ind],
                all_anchor_ratios[ind],
                name='get_anchors_width_height{}'.format(ind))
            anchors_height.append(_anchors_height)
            anchors_width.append(_anchors_width)
            anchors_depth.append(_anchor_depth)
        #anchors_ymin: [38*38*4 + 19*19*6 + 10*10*6 + 5*5*6 + 3*3*4 + 1*!*4]
        anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask = anchor_encoder_decoder.get_all_anchors(
            target_shape, anchors_height, anchors_width, anchors_depth,
            [0.5] * total_layers, all_layer_shapes, all_layer_strides,
            [FLAGS.train_image_size * 1.] * total_layers,
            [False] * total_layers)

        num_anchors_per_layer = list()
        #all_layer_shapes [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]
        for ind, layer_shape in enumerate(all_layer_shapes):
            #num_anchors_per_layer:layer_shape[0]*layer_layer[1]*anchors_depth
            _, _num_anchors_per_layer = anchor_encoder_decoder.get_anchors_count(
                anchors_depth[ind],
                layer_shape,
                name='get_anchor_count{}'.format(ind))
            num_anchors_per_layer.append(_num_anchors_per_layer)
        #num_anchors_per_layer:[38*38*4, 19*19*6, 10*10*6, 5*5*6, 3*3*4, 1*!*4]
        image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image(
            image_,
            labels_,
            bboxes_,
            target_shape,
            is_training=is_training,
            data_format=FLAGS.data_format,
            output_rgb=False)
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_anchors(
            glabels_, gbboxes_, anchors_ymin, anchors_xmin, anchors_ymax,
            anchors_xmax, inside_mask)

        image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(
            FLAGS.num_classes,
            batch_size, ('train' if is_training else 'val'),
            os.path.join(FLAGS.data_dir, dataset_pattern),
            FLAGS.num_readers,
            FLAGS.num_preprocessing_threads,
            image_preprocessing_fn,
            anchor_encoder_fn,
            num_epochs=FLAGS.train_epochs,
            is_training=is_training)
        global global_anchor_info
        global_anchor_info = {
            'decode_fn':
            lambda pred: anchor_encoder_decoder.batch_decode_anchors(
                pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax),
            'num_anchors_per_layer':
            num_anchors_per_layer,
            'all_num_anchors_depth':
            anchors_depth
        }

        return image, {
            'shape': shape,
            'loc_targets': loc_targets,
            'cls_targets': cls_targets,
            'match_scores': match_scores
        }