def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  required_flags = [
      'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
      'output_tf_record_path_prefix'
  ]
  for flag_name in required_flags:
    if not getattr(FLAGS, flag_name):
      raise ValueError('Flag --{} is required'.format(flag_name))

  label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
  all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
  if FLAGS.input_image_label_annotations_csv:
    all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
    all_label_annotations.rename(
        columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
  else:
    all_label_annotations = None
  all_images = tf.gfile.Glob(
      os.path.join(FLAGS.input_images_directory, '*.jpg'))
  all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
  all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
  all_annotations = pd.concat(
      [all_box_annotations, all_image_ids, all_label_annotations])

  tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

  with contextlib2.ExitStack() as tf_record_close_stack:
    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
        FLAGS.num_shards)

    for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
      tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
                             counter)

      image_id, image_annotations = image_data
      # In OID image file names are formed by appending ".jpg" to the image ID.
      image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
      with tf.gfile.Open(image_path) as image_file:
        encoded_image = image_file.read()

      tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
          image_annotations, label_map, encoded_image)
      if tf_example:
        shard_idx = int(image_id, 16) % FLAGS.num_shards
        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
def main(_):
  data_dir = FLAGS.data_dir
  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  logging.info('Reading from Pet dataset.')
  image_dir = os.path.join(data_dir, 'images')
  annotations_dir = os.path.join(data_dir, 'annotations')
  examples_path = os.path.join(annotations_dir, 'trainval.txt')
  examples_list = dataset_util.read_examples_list(examples_path)

  # Test images are not included in the downloaded data set, so we shall perform
  # our own split.
  random.seed(42)
  random.shuffle(examples_list)
  num_examples = len(examples_list)
  num_train = int(0.7 * num_examples)
  train_examples = examples_list[:num_train]
  val_examples = examples_list[num_train:]
  logging.info('%d training and %d validation examples.',
               len(train_examples), len(val_examples))

  train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
  val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
  if not FLAGS.faces_only:
    train_output_path = os.path.join(FLAGS.output_dir,
                                     'pets_fullbody_with_masks_train.record')
    val_output_path = os.path.join(FLAGS.output_dir,
                                   'pets_fullbody_with_masks_val.record')
  create_tf_record(
      train_output_path,
      FLAGS.num_shards,
      label_map_dict,
      annotations_dir,
      image_dir,
      train_examples,
      faces_only=FLAGS.faces_only,
      mask_type=FLAGS.mask_type)
  create_tf_record(
      val_output_path,
      FLAGS.num_shards,
      label_map_dict,
      annotations_dir,
      image_dir,
      val_examples,
      faces_only=FLAGS.faces_only,
      mask_type=FLAGS.mask_type)
Exemple #3
0
    def test_get_label_map_dict(self):
        label_map_string = """
      item {
        id:2
        name:'cat'
      }
      item {
        id:1
        name:'dog'
      }
    """
        label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
        with tf.gfile.Open(label_map_path, 'wb') as f:
            f.write(label_map_string)

        label_map_dict = label_map_util.get_label_map_dict(label_map_path)
        self.assertEqual(label_map_dict['dog'], 1)
        self.assertEqual(label_map_dict['cat'], 2)
Exemple #4
0
    def test_get_label_map_dict_with_fill_in_gaps_and_background(self):
        label_map_string = """
      item {
        id:3
        name:'cat'
      }
      item {
        id:1
        name:'dog'
      }
    """
        label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
        with tf.gfile.Open(label_map_path, 'wb') as f:
            f.write(label_map_string)

        label_map_dict = label_map_util.get_label_map_dict(
            label_map_path, fill_in_gaps_and_background=True)

        self.assertEqual(label_map_dict['background'], 0)
        self.assertEqual(label_map_dict['dog'], 1)
        self.assertEqual(label_map_dict['class_2'], 2)
        self.assertEqual(label_map_dict['cat'], 3)
        self.assertEqual(len(label_map_dict), max(label_map_dict.values()) + 1)
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    if FLAGS.year not in YEARS:
        raise ValueError('year must be in : {}'.format(YEARS))

    data_dir = FLAGS.data_dir
    years = ['VOC2007', 'VOC2012']
    if FLAGS.year != 'merged':
        years = [FLAGS.year]

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)
        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     'aeroplane_' + FLAGS.set + '.txt')
        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
        examples_list = dataset_util.read_examples_list(examples_path)
        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, FLAGS.data_dir,
                                            label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())

    writer.close()
def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
                               label_map_path, validation_set_size):
    """Convert the KITTI detection dataset to TFRecords.

  Args:
    data_dir: The full path to the unzipped folder containing the unzipped data
      from data_object_image_2 and data_object_label_2.zip.
      Folder structure is assumed to be: data_dir/training/label_2 (annotations)
      and data_dir/data_object_image_2/training/image_2 (images).
    output_path: The path to which TFRecord files will be written. The TFRecord
      with the training set will be located at: <output_path>_train.tfrecord
      And the TFRecord with the validation set will be located at:
      <output_path>_val.tfrecord
    classes_to_use: List of strings naming the classes for which data should be
      converted. Use the same names as presented in the KIITI README file.
      Adding dontcare class will remove all other bounding boxes that overlap
      with areas marked as dontcare regions.
    label_map_path: Path to label map proto
    validation_set_size: How many images should be left as the validation set.
      (Ffirst `validation_set_size` examples are selected to be in the
      validation set).
  """
    label_map_dict = label_map_util.get_label_map_dict(label_map_path)
    train_count = 0
    val_count = 0

    annotation_dir = os.path.join(data_dir, 'training', 'label_2')

    image_dir = os.path.join(data_dir, 'data_object_image_2', 'training',
                             'image_2')

    train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord' %
                                               output_path)
    val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord' % output_path)

    images = sorted(tf.gfile.ListDirectory(image_dir))
    for img_name in images:
        img_num = int(img_name.split('.')[0])
        is_validation_img = img_num < validation_set_size
        img_anno = read_annotation_file(
            os.path.join(annotation_dir,
                         str(img_num).zfill(6) + '.txt'))

        image_path = os.path.join(image_dir, img_name)

        # Filter all bounding boxes of this frame that are of a legal class, and
        # don't overlap with a dontcare region.
        # TODO(talremez) filter out targets that are truncated or heavily occluded.
        annotation_for_image = filter_annotations(img_anno, classes_to_use)

        example = prepare_example(image_path, annotation_for_image,
                                  label_map_dict)
        if is_validation_img:
            val_writer.write(example.SerializeToString())
            val_count += 1
        else:
            train_writer.write(example.SerializeToString())
            train_count += 1

    train_writer.close()
    val_writer.close()
    def __init__(self,
                 load_instance_masks=False,
                 instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
                 label_map_proto_file=None,
                 use_display_name=False,
                 dct_method='',
                 num_keypoints=0,
                 num_additional_channels=0):
        """Constructor sets keys_to_features and items_to_handlers.

    Args:
      load_instance_masks: whether or not to load and handle instance masks.
      instance_mask_type: type of instance masks. Options are provided in
        input_reader.proto. This is only used if `load_instance_masks` is True.
      label_map_proto_file: a file path to a
        object_detection.protos.StringIntLabelMap proto. If provided, then the
        mapped IDs of 'image/object/class/text' will take precedence over the
        existing 'image/object/class/label' ID.  Also, if provided, it is
        assumed that 'image/object/class/text' will be in the data.
      use_display_name: whether or not to use the `display_name` for label
        mapping (instead of `name`).  Only used if label_map_proto_file is
        provided.
      dct_method: An optional string. Defaults to None. It only takes
        effect when image format is jpeg, used to specify a hint about the
        algorithm used for jpeg decompression. Currently valid values
        are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
        example, the jpeg library does not have that specific option.
      num_keypoints: the number of keypoints per object.
      num_additional_channels: how many additional channels to use.

    Raises:
      ValueError: If `instance_mask_type` option is not one of
        input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
        input_reader_pb2.PNG_MASKS.
    """
        self.keys_to_features = {
            'image/encoded': tf.FixedLenFeature((),
                                                tf.string,
                                                default_value=''),
            'image/format': tf.FixedLenFeature((),
                                               tf.string,
                                               default_value='jpeg'),
            'image/filename': tf.FixedLenFeature((),
                                                 tf.string,
                                                 default_value=''),
            'image/key/sha256': tf.FixedLenFeature((),
                                                   tf.string,
                                                   default_value=''),
            'image/source_id': tf.FixedLenFeature((),
                                                  tf.string,
                                                  default_value=''),
            'image/height': tf.FixedLenFeature((), tf.int64, default_value=1),
            'image/width': tf.FixedLenFeature((), tf.int64, default_value=1),
            # Image-level labels.
            'image/class/text': tf.VarLenFeature(tf.string),
            'image/class/label': tf.VarLenFeature(tf.int64),
            # Object boxes and classes.
            'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
            'image/object/class/label': tf.VarLenFeature(tf.int64),
            'image/object/class/text': tf.VarLenFeature(tf.string),
            'image/object/area': tf.VarLenFeature(tf.float32),
            'image/object/is_crowd': tf.VarLenFeature(tf.int64),
            'image/object/difficult': tf.VarLenFeature(tf.int64),
            'image/object/group_of': tf.VarLenFeature(tf.int64),
            'image/object/weight': tf.VarLenFeature(tf.float32),
        }
        # We are checking `dct_method` instead of passing it directly in order to
        # ensure TF version 1.6 compatibility.
        if dct_method:
            image = slim_example_decoder.Image(image_key='image/encoded',
                                               format_key='image/format',
                                               channels=3,
                                               dct_method=dct_method)
            additional_channel_image = slim_example_decoder.Image(
                image_key='image/additional_channels/encoded',
                format_key='image/format',
                channels=1,
                repeated=True,
                dct_method=dct_method)
        else:
            image = slim_example_decoder.Image(image_key='image/encoded',
                                               format_key='image/format',
                                               channels=3)
            additional_channel_image = slim_example_decoder.Image(
                image_key='image/additional_channels/encoded',
                format_key='image/format',
                channels=1,
                repeated=True)
        self.items_to_handlers = {
            fields.InputDataFields.image:
            image,
            fields.InputDataFields.source_id:
            (slim_example_decoder.Tensor('image/source_id')),
            fields.InputDataFields.key:
            (slim_example_decoder.Tensor('image/key/sha256')),
            fields.InputDataFields.filename:
            (slim_example_decoder.Tensor('image/filename')),
            # Object boxes and classes.
            fields.InputDataFields.groundtruth_boxes:
            (slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                              'image/object/bbox/')),
            fields.InputDataFields.groundtruth_area:
            slim_example_decoder.Tensor('image/object/area'),
            fields.InputDataFields.groundtruth_is_crowd:
            (slim_example_decoder.Tensor('image/object/is_crowd')),
            fields.InputDataFields.groundtruth_difficult:
            (slim_example_decoder.Tensor('image/object/difficult')),
            fields.InputDataFields.groundtruth_group_of:
            (slim_example_decoder.Tensor('image/object/group_of')),
            fields.InputDataFields.groundtruth_weights:
            (slim_example_decoder.Tensor('image/object/weight')),
        }
        if num_additional_channels > 0:
            self.keys_to_features[
                'image/additional_channels/encoded'] = tf.FixedLenFeature(
                    (num_additional_channels, ), tf.string)
            self.items_to_handlers[
                fields.InputDataFields.
                image_additional_channels] = additional_channel_image
        self._num_keypoints = num_keypoints
        if num_keypoints > 0:
            self.keys_to_features['image/object/keypoint/x'] = (
                tf.VarLenFeature(tf.float32))
            self.keys_to_features['image/object/keypoint/y'] = (
                tf.VarLenFeature(tf.float32))
            self.items_to_handlers[
                fields.InputDataFields.groundtruth_keypoints] = (
                    slim_example_decoder.ItemHandlerCallback(
                        ['image/object/keypoint/y', 'image/object/keypoint/x'],
                        self._reshape_keypoints))
        if load_instance_masks:
            if instance_mask_type in (input_reader_pb2.DEFAULT,
                                      input_reader_pb2.NUMERICAL_MASKS):
                self.keys_to_features['image/object/mask'] = (tf.VarLenFeature(
                    tf.float32))
                self.items_to_handlers[
                    fields.InputDataFields.groundtruth_instance_masks] = (
                        slim_example_decoder.ItemHandlerCallback([
                            'image/object/mask', 'image/height', 'image/width'
                        ], self._reshape_instance_masks))
            elif instance_mask_type == input_reader_pb2.PNG_MASKS:
                self.keys_to_features['image/object/mask'] = tf.VarLenFeature(
                    tf.string)
                self.items_to_handlers[
                    fields.InputDataFields.groundtruth_instance_masks] = (
                        slim_example_decoder.ItemHandlerCallback([
                            'image/object/mask', 'image/height', 'image/width'
                        ], self._decode_png_instance_masks))
            else:
                raise ValueError(
                    'Did not recognize the `instance_mask_type` option.')
        if label_map_proto_file:
            label_map = label_map_util.get_label_map_dict(
                label_map_proto_file, use_display_name)
            # We use a default_value of -1, but we expect all labels to be contained
            # in the label map.
            table = tf.contrib.lookup.HashTable(
                initializer=tf.contrib.lookup.KeyValueTensorInitializer(
                    keys=tf.constant(list(label_map.keys())),
                    values=tf.constant(list(label_map.values()),
                                       dtype=tf.int64)),
                default_value=-1)
            # If the label_map_proto is provided, try to use it in conjunction with
            # the class text, and fall back to a materialized ID.
            # TODO(lzc): note that here we are using BackupHandler defined in this
            # file(which is branching slim_example_decoder.BackupHandler). Need to
            # switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
            # more popular.
            label_handler = BackupHandler(
                LookupTensor('image/object/class/text',
                             table,
                             default_value=''),
                slim_example_decoder.Tensor('image/object/class/label'))
            image_label_handler = BackupHandler(
                LookupTensor(fields.TfExampleFields.image_class_text,
                             table,
                             default_value=''),
                slim_example_decoder.Tensor(
                    fields.TfExampleFields.image_class_label))
        else:
            label_handler = slim_example_decoder.Tensor(
                'image/object/class/label')
            image_label_handler = slim_example_decoder.Tensor(
                fields.TfExampleFields.image_class_label)
        self.items_to_handlers[
            fields.InputDataFields.groundtruth_classes] = label_handler
        self.items_to_handlers[fields.InputDataFields.
                               groundtruth_image_classes] = image_label_handler