def __init__(self,
               tensor_key,
               label_map_proto_file,
               shape_keys=None,
               shape=None,
               default_value=''):
    """Initializes the LookupTensor handler.

    Simply calls a vocabulary (most often, a label mapping) lookup.

    Args:
      tensor_key: the name of the `TFExample` feature to read the tensor from.
      label_map_proto_file: File path to a text format LabelMapProto message
        mapping class text to id.
      shape_keys: Optional name or list of names of the TF-Example feature in
        which the tensor shape is stored. If a list, then each corresponds to
        one dimension of the shape.
      shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is
        reshaped accordingly.
      default_value: The value used when the `tensor_key` is not found in a
        particular `TFExample`.

    Raises:
      ValueError: if both `shape_keys` and `shape` are specified.
    """
    name_to_id = label_map_util.get_label_map_dict(
        label_map_proto_file, use_display_name=False)
    # We use a default_value of -1, but we expect all labels to be contained
    # in the label map.
    name_to_id_table = tf.contrib.lookup.HashTable(
        initializer=tf.contrib.lookup.KeyValueTensorInitializer(
            keys=tf.constant(list(name_to_id.keys())),
            values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
        default_value=-1)
    display_name_to_id = label_map_util.get_label_map_dict(
        label_map_proto_file, use_display_name=True)
    # We use a default_value of -1, but we expect all labels to be contained
    # in the label map.
    display_name_to_id_table = tf.contrib.lookup.HashTable(
        initializer=tf.contrib.lookup.KeyValueTensorInitializer(
            keys=tf.constant(list(display_name_to_id.keys())),
            values=tf.constant(
                list(display_name_to_id.values()), dtype=tf.int64)),
        default_value=-1)

    self._name_to_id_table = name_to_id_table
    self._display_name_to_id_table = display_name_to_id_table
    super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape,
                                              default_value)
def main(_):
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from chengdu dataset.')
    # image_dir = os.path.join(data_dir, 'images')
    # annotations_dir = os.path.join(data_dir, 'annotations')  # json
    examples_path = os.path.join(data_dir, 'masks.txt')
    examples_list = dataset_util.read_examples_list(examples_path)

    # Test images are not included in the downloaded data set, so we shall perform
    # our own split.
    random.seed(42)
    random.shuffle(examples_list)
    num_examples = len(examples_list)
    num_train = int(0.7 * num_examples)
    train_examples = examples_list[:num_train]
    val_examples = examples_list[num_train:]
    logging.info('%d training and %d validation examples.',
                 len(train_examples), len(val_examples))

    train_output_path = os.path.join(FLAGS.output_dir, 'mask_train.record')
    val_output_path = os.path.join(FLAGS.output_dir, 'mask_pet_val.record')

    create_tf_record(
        train_output_path,  # output tfrecord
        label_map_dict,  # label
        train_examples,
        data_dir)
    create_tf_record(
        val_output_path,
        label_map_dict,
        val_examples,
        data_dir)
def main(_):
  data_dir = FLAGS.data_dir
  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  logging.info('Reading from Pet dataset.')
  image_dir = os.path.join(data_dir, 'images')
  annotations_dir = os.path.join(data_dir, 'annotations')
  examples_path = os.path.join(annotations_dir, 'trainval.txt')
  examples_list = dataset_util.read_examples_list(examples_path)

  # Test images are not included in the downloaded data set, so we shall perform
  # our own split.
  random.seed(42)
  random.shuffle(examples_list)
  num_examples = len(examples_list)
  num_train = int(0.7 * num_examples)
  train_examples = examples_list[:num_train]
  val_examples = examples_list[num_train:]
  logging.info('%d training and %d validation examples.',
               len(train_examples), len(val_examples))

  train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
  val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
  if FLAGS.faces_only:
    train_output_path = os.path.join(FLAGS.output_dir,
                                     'pet_train_with_masks.record')
    val_output_path = os.path.join(FLAGS.output_dir,
                                   'pet_val_with_masks.record')
  create_tf_record(train_output_path, label_map_dict, annotations_dir,
                   image_dir, train_examples, faces_only=FLAGS.faces_only)
  create_tf_record(val_output_path, label_map_dict, annotations_dir,
                   image_dir, val_examples, faces_only=FLAGS.faces_only)
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  for year in years:
    logging.info('Reading from PASCAL %s dataset.', year)
    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 'aeroplane_' + FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      path = os.path.join(annotations_dir, example + '.xml')
      with tf.gfile.GFile(path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
      writer.write(tf_example.SerializeToString())

  writer.close()
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  required_flags = [
      'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
      'output_tf_record_path_prefix'
  ]
  for flag_name in required_flags:
    if not getattr(FLAGS, flag_name):
      raise ValueError('Flag --{} is required'.format(flag_name))

  label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
  all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
  if FLAGS.input_image_label_annotations_csv:
    all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
    all_label_annotations.rename(
        columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
  else:
    all_label_annotations = None
  all_images = tf.gfile.Glob(
      os.path.join(FLAGS.input_images_directory, '*.jpg'))
  all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
  all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
  all_annotations = pd.concat(
      [all_box_annotations, all_image_ids, all_label_annotations])

  tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

  with contextlib2.ExitStack() as tf_record_close_stack:
    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
        FLAGS.num_shards)

    for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
      tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
                             counter)

      image_id, image_annotations = image_data
      # In OID image file names are formed by appending ".jpg" to the image ID.
      image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
      with tf.gfile.Open(image_path) as image_file:
        encoded_image = image_file.read()

      tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
          image_annotations, label_map, encoded_image)
      if tf_example:
        shard_idx = int(image_id, 16) % FLAGS.num_shards
        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
def process_images(image_files, output_path):
    print('# Started ' + output_path)
    annotations_dir = FLAGS.annotations_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    writer = tf.python_io.TFRecordWriter(output_path)
    for idx, image_file in enumerate(image_files):
        image_path = os.path.join(FLAGS.images_dir, image_file)
        print(idx, image_path)
        annotation_path = os.path.join(annotations_dir, os.path.splitext(image_file)[0] + '.xml')
        with tf.gfile.GFile(annotation_path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        tf_example = dict_to_tf_example(data, image_path, FLAGS.masks_dir, label_map_dict)
        writer.write(tf_example.SerializeToString())
    writer.close()
Exemple #7
0
 def test_get_label_map_dict_from_proto(self):
   label_map_string = """
     item {
       id:2
       name:'cat'
     }
     item {
       id:1
       name:'dog'
     }
   """
   label_map_proto = text_format.Parse(
       label_map_string, string_int_label_map_pb2.StringIntLabelMap())
   label_map_dict = label_map_util.get_label_map_dict(label_map_proto)
   self.assertEqual(label_map_dict['dog'], 1)
   self.assertEqual(label_map_dict['cat'], 2)
def convert_lisa_to_tfrecords(data_dir, output_path, label_map_path,
                              validation_set_size):
    """
    Convert the LISA detection dataset to TFRecords.
    :param data_dir: directory with the name "signDatabasePublicFramesOnly"
    :param output_path: suggest ./data
    :param label_map_path: full path to the label_map
    :param validation_set_size: default of 500 with flag settings
    :return: N/A
    """
    label_map_dict = label_map_util.get_label_map_dict(label_map_path)
    train_count = 0
    val_count = 0

    annotations_dir = os.path.join(data_dir, 'allAnnotations.csv')
    train_writer = tf.python_io.TFRecordWriter(
        os.path.join(output_path, 'LISA_train.tfrecord'))
    val_writer = tf.python_io.TFRecordWriter(
        os.path.join(output_path, 'LISA_val.tfrecord'))

    # parse annotation csv file
    with open(annotations_dir) as csvFile:
        data_reader = csv.reader(csvFile, delimiter=';')
        next(data_reader)  # for skipping first row
        parsed_annotations = []
        for row in data_reader:
            parsed_annotations.append([row])

    random.seed(49)
    random.shuffle(parsed_annotations)

    for img_num, parsed_annotation in enumerate(parsed_annotations):
        is_validation_img = img_num < validation_set_size
        image_path = os.path.join(data_dir, parsed_annotation[0][0])
        example = prepare_example(image_path, parsed_annotation[0],
                                  label_map_dict)
        if is_validation_img:
            val_writer.write(example.SerializeToString())
            val_count += 1
        else:
            train_writer.write(example.SerializeToString())
            train_count += 1

    train_writer.close()
    val_writer.close()
    print("trained with %s images and validated with %s images" %
          (train_count, val_count))
def main(unused_argv):

    data_dir = FLAGS.data_dir
    if FLAGS.labels_dir is None:
        FLAGS.labels_dir = os.path.join(data_dir, 'labels')
    labels_map = label_map_util.get_label_map_dict(FLAGS.labels_map_path)
    label_files_train = os.listdir(FLAGS.labels_dir)
    label_files_train = [
        os.path.join(FLAGS.labels_dir, file_name)
        for file_name in label_files_train
    ]
    output_path_train = FLAGS.output_path
    split_train_test = FLAGS.split_train_test

    print('Total samples: {}'.format(len(label_files_train)))

    if split_train_test:
        label_files_train, label_files_eval = train_test_split(
            label_files_train, test_size=split_train_test, shuffle=True)
        dir_path = os.path.dirname(output_path_train)

        if len(dir_path) and not os.path.isdir(dir_path):
            os.makedirs(dir_path)

        file_name_split = os.path.splitext(os.path.basename(output_path_train))

        if file_name_split[1] == '':
            file_name_split = (file_name_split[0], '.record')

        output_path_train = os.path.join(
            dir_path, 'train_{}{}'.format(file_name_split[0],
                                          file_name_split[1]))
        output_path_eval = os.path.join(
            dir_path, 'test_{}{}'.format(file_name_split[0],
                                         file_name_split[1]))

    create_tf_record(label_files_train, data_dir, labels_map,
                     output_path_train)
    print('TF record file for training created with {} samples: {}'.format(
        len(label_files_train), output_path_train))

    if label_files_eval:
        create_tf_record(label_files_eval, data_dir, labels_map,
                         output_path_eval)
        print(
            'TF record file for validation created with {} samples: {}'.format(
                len(label_files_eval), output_path_eval))
Exemple #10
0
def main(_):
    pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
    with tf.gfile.GFile(
            os.path.join(FLAGS.result_base, FLAGS.pipeline_config_path),
            'r') as f:
        text_format.Merge(f.read(), pipeline_config)
    text_format.Merge(FLAGS.config_override, pipeline_config)
    if FLAGS.input_shape:
        input_shape = [
            int(dim) if dim != '-1' else None
            for dim in FLAGS.input_shape.split(',')
        ]
    else:
        input_shape = None

    if os.path.exists(FLAGS.model_dir) and os.path.isdir(FLAGS.model_dir):
        shutil.rmtree(FLAGS.model_dir)

    if not FLAGS.trained_checkpoint_prefix:
        path = os.path.join(FLAGS.result_base, FLAGS.trained_checkpoint_path)
        regex = re.compile(r"model\.ckpt-([0-9]+)\.index")
        numbers = [
            int(regex.search(f).group(1)) for f in os.listdir(path)
            if regex.search(f)
        ]
        if not numbers:
            print('No checkpoint found!')
            exit()
        trained_checkpoint_prefix = os.path.join(
            path, 'model.ckpt-{}'.format(max(numbers)))
    else:
        trained_checkpoint_prefix = FLAGS.trained_checkpoint_prefix

    exporter.export_inference_graph(
        FLAGS.input_type,
        pipeline_config,
        trained_checkpoint_prefix,
        FLAGS.model_dir,
        input_shape=input_shape,
        write_inference_graph=FLAGS.write_inference_graph)

    label_map = get_label_map_dict(
        os.path.join(FLAGS.result_base, FLAGS.label_map_path))
    label_array = [k for k in sorted(label_map, key=label_map.get)]
    with open(os.path.join(FLAGS.model_dir, FLAGS.output_label_path),
              'w') as f:
        json.dump(label_array, f)
Exemple #11
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    data_dir = FLAGS.data_dir

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    

    logging.info('Reading from PASCAL %s dataset.')
    
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    if FLAGS.set=='trainval':
        examples_path_val = os.path.join(data_dir,'ImageSets', 'Main',
                                 'anomaly_val.txt')
        examples_path_train = os.path.join(data_dir,'ImageSets', 'Main',
                                 'anomaly_train.txt')
        examples_list_train = dataset_util.read_examples_list(examples_path_train)
        examples_list_val = dataset_util.read_examples_list(examples_path_val)
        examples_list = examples_list_train + examples_list_val
    else:
        examples_path = os.path.join(data_dir,'ImageSets', 'Main',
                                 'anomaly_' + FLAGS.set + '.txt')

        examples_list = dataset_util.read_examples_list(examples_path)
    
    #shuffle files
    shuffle(examples_list)
    for idx, example in enumerate(examples_list):
        
        if idx % 100 == 0:
            tf.logging.info('On image %d of %d', idx, len(examples_list))
        path = os.path.join(annotations_dir, example + '.xml')
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())
    
    writer.close()
    tf.logging.info('TFrecord created!')
def main(_):
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from Pet dataset.')
    image_dir = os.path.join(data_dir, 'images')
    annotations_dir = os.path.join(data_dir, 'annotations')

    #generate trainval.txt
    # path = image_dir
    list_trainval = os.listdir(image_dir)
    file_name = []
    for item in list_trainval:
        #     print(item)
        temp = item.strip().split('.')
        #     print(temp[0])
        file_name.append(temp[0])
    temp_dir = os.path.abspath(os.path.join(annotations_dir, 'trainval.txt'))
    with open(temp_dir, 'w') as f:
        for item in file_name:
            f.write(item + '\n')

    examples_path = os.path.join(annotations_dir, 'trainval.txt')
    examples_list = dataset_util.read_examples_list(examples_path)

    # Test images are not included in the downloaded data set, so we shall perform
    # our own split.
    random.seed(42)
    random.shuffle(examples_list)
    num_examples = len(examples_list)
    num_train = int(0.7 * num_examples)
    train_examples = examples_list[:num_train]
    val_examples = examples_list[num_train:]
    logging.info('%d training and %d validation examples.',
                 len(train_examples), len(val_examples))

    train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
    val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
    # if FLAGS.faces_only:
    #   train_output_path = os.path.join(FLAGS.output_dir,
    #                                    'pet_train_with_masks.record')
    #   val_output_path = os.path.join(FLAGS.output_dir,
    #                                  'pet_val_with_masks.record')
    create_tf_record(train_output_path, label_map_dict, annotations_dir,
                     image_dir, train_examples)
    create_tf_record(val_output_path, label_map_dict, annotations_dir,
                     image_dir, val_examples)
Exemple #13
0
def main(_):
    LOG_FILE = FLAGS.log_path
    if LOG_FILE is not None:
        with open(LOG_FILE, 'w') as log:
            log.write("IMG_PATH\n")
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    DATASET_PATH = os.path.normpath(FLAGS.data_dir)
    OUTPUT_PATH = os.path.normpath(FLAGS.output_path)
    if '%TYPE%' in DATASET_PATH:
        PATHS = [(DATASET_PATH.replace('%TYPE%', datasetType),
                  OUTPUT_PATH.replace('%TYPE%', datasetType))
                 for datasetType in ['train', 'val']]
    else:
        PATHS = [(DATASET_PATH, FLAGS.output_path)]

    for datasetPath, outputPath in PATHS:
        logging.info(f'Using {datasetPath}')
        start = time()
        imageDirList = os.listdir(datasetPath)
        nbImage = len(imageDirList)
        record_dir = os.path.dirname(outputPath)
        if not os.path.exists(record_dir):
            os.makedirs(record_dir, exist_ok=True)
        num_shards = max(
            1, nbImage // IMG_PER_SHARD +
            (0 if nbImage % IMG_PER_SHARD < IMG_PER_SHARD * 0.2 else 1))
        if FLAGS.no_shard:
            num_shards = 1
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack, outputPath, num_shards)
            for idx, imageDir in enumerate(imageDirList):
                if LOG_FILE is not None:
                    with open(LOG_FILE, 'a') as log:
                        log.write(f"{imageDir}\n")
                if idx % 50 == 0:
                    logging.info(f'On image {idx} of {len(imageDirList)}')

                IMAGE_DIR_PATH = os.path.join(datasetPath, imageDir)
                data = getImageData(str(IMAGE_DIR_PATH), label_map_dict)
                tf_example = data2TFExample(data)
                output_tfrecords[idx % num_shards].write(
                    tf_example.SerializeToString())
        total_time = time() - start
        m = int(total_time) // 60
        s = int(total_time) % 60
        print(f"{m:02d}:{s:02d}", flush=True)
Exemple #14
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    required_flags = [
        'input_annotations_csv', 'input_images_directory', 'input_label_map',
        'output_tf_record_path_prefix'
    ]
    for flag_name in required_flags:
        if not getattr(FLAGS, flag_name):
            raise ValueError('Flag --{} is required'.format(flag_name))

    label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
    all_annotations = pd.read_csv(FLAGS.input_annotations_csv)
    all_images = tf.gfile.Glob(
        os.path.join(FLAGS.input_images_directory, '*.jpg'))
    all_image_ids = [
        os.path.splitext(os.path.basename(v))[0] for v in all_images
    ]
    all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
    all_annotations = pd.concat([all_annotations, all_image_ids])

    tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
            tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
            FLAGS.num_shards)

        for counter, image_data in enumerate(
                all_annotations.groupby('ImageID')):
            tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...',
                                   1000, counter)

            image_id, image_annotations = image_data
            # In OID image file names are formed by appending ".jpg" to the image ID.
            print(FLAGS.input_images_directory, image_id, '.jpg')
            image_path = os.path.join(FLAGS.input_images_directory,
                                      str(image_id) + '.jpg')
            with tf.gfile.Open(image_path) as image_file:
                encoded_image = image_file.read()

            tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
                image_annotations, label_map, encoded_image)
            if tf_example:
                shard_idx = int(image_id, 16) % FLAGS.num_shards
                output_tfrecords[shard_idx].write(
                    tf_example.SerializeToString())
def main(_):
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from Pet dataset.')
    image_dir = os.path.join(data_dir,
                             'images')  #H:\Dataset\Oxford-IIITPet\images
    annotations_dir = os.path.join(
        data_dir, 'annotations')  #H:\Dataset\Oxford-IIITPet\annotations
    examples_path = os.path.join(
        annotations_dir, 'trainval.txt')  #training and validation samples
    examples_list = dataset_util.read_examples_list(examples_path)

    # Test images are not included in the downloaded data set, so we shall perform
    # our own split.
    random.seed(42)
    random.shuffle(examples_list)
    num_examples = len(examples_list)
    num_train = int(0.7 * num_examples)  #number of examples for training
    train_examples = examples_list[:num_train]
    val_examples = examples_list[num_train:]
    logging.info('%d training and %d validation examples.',
                 len(train_examples), len(val_examples))

    train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
    val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
    if FLAGS.faces_only:
        train_output_path = os.path.join(FLAGS.output_dir,
                                         'pet_train_with_masks.record')
        val_output_path = os.path.join(FLAGS.output_dir,
                                       'pet_val_with_masks.record')
    create_tf_record(
        train_output_path,  #H:\Dataset\Oxford-IIITPet\TFRecords
        label_map_dict,  #H:\Dataset\Oxford-IIITPet\TFRecords
        annotations_dir,  #H:\Dataset\Oxford-IIITPet\images\
        image_dir,  #H:\Dataset\Oxford-IIITPet\annotations
        train_examples,  #training examples, data type: list
        faces_only=FLAGS.faces_only,
        mask_type=FLAGS.mask_type)  # png
    create_tf_record(
        val_output_path,
        label_map_dict,
        annotations_dir,
        image_dir,
        val_examples,  #validation examples, data type: list
        faces_only=FLAGS.faces_only,
        mask_type=FLAGS.mask_type)
Exemple #16
0
def main(_):
    logging.info("Starting TF Record conversor ...")
    logging.info("Reading dataset from: {}".format(FLAGS.labeled_path))
    logging.info("Output TF Record in: {}".format(FLAGS.output_path))
    logging.info("Using label map file: {}".format(FLAGS.output_path))

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    images, xmls = find_labeled_images(FLAGS.labeled_path)

    for image, label_img_xml in zip(images, xmls):
        tf_example = create_tf_example(image, label_img_xml, label_map_dict)
        writer.write(tf_example.SerializeToString())

    writer.close()
Exemple #17
0
def main(_):
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    class_impl = json.loads(FLAGS.class_implications.replace(
        "'", '"')) if FLAGS.class_implications else None
    class_prio = json.loads(FLAGS.class_priorities.replace(
        "'", '"')) if FLAGS.class_priorities else DEFAULT_CLASS_PRIORITIES
    assert isinstance(class_prio, list)

    def has_class_arg(target_class):
        return target_class is not None and target_class in class_prio

    if class_impl is None and has_class_arg(
            FLAGS.base_class) and has_class_arg(FLAGS.target_class):
        class_impl = derive_implications(class_prio, FLAGS.base_class,
                                         FLAGS.target_class)
        pprint(class_impl)

    image_dir = '/home/gabi/Desktop/AllDATA/DogData/val/JPEGImages'
    annotations_dir = '/home/gabi/Desktop/AllDATA/DogData/val/Annotations'
    output_path = '/home/gabi/Desktop/AllDATA/DogData/val'
    logging.info('Reading from dataset: ' + annotations_dir)
    examples_list = os.listdir(annotations_dir)

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    stats['impl_classes_replaced'] = 0
    stats['impl_images_replaced'] = 0

    for idx, example in enumerate(examples_list):
        if example.endswith('.xml'):
            if idx % 50 == 0:
                print('On image %d of %d' % (idx, len(examples_list)))

            path = os.path.join(annotations_dir, example)
            with tf.io.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
            tf_example = dict_to_tf_example(data, image_dir, label_map_dict,
                                            class_impl)

            writer.write(tf_example.SerializeToString())

    writer.close()
    if class_impl is not None:
        print("Replaced {} classes in {} images with implied classes".format(
            stats['impl_classes_replaced'], stats['impl_images_replaced']))
Exemple #18
0
def main(_):
  data_dir = FLAGS.data_dir
  mask_name = FLAGS.mask_name
  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  logging.info('Reading from LFW dataset.')
  image_dir = os.path.join(data_dir, 'images')
  # annotations_dir = os.path.join(data_dir, 'annotations') ADDED
  # examples_path = os.path.join(annotations_dir, 'trainval.txt') ADDED
  mask_dir = os.path.join(data_dir, mask_name)
  examples_list = [img_path[:-8]+".jpg" for img_path in os.listdir(mask_dir)]

  # Test images are not included in the downloaded data set, so we shall perform
  # our own split.
  random.seed(42)
  random.shuffle(examples_list)
  num_examples = len(examples_list)
  num_train = int(0.7 * num_examples)
  train_examples = examples_list[:num_train]
  val_examples = examples_list[num_train:]
  logging.info('%d training and %d validation examples.',
               len(train_examples), len(val_examples))

  if not FLAGS.faces_only:
    train_output_path = os.path.join(FLAGS.output_dir,
                                     'pictures_with_masks_train.record')
    val_output_path = os.path.join(FLAGS.output_dir,
                                   'pictures_with_masks_val.record')
  create_tf_record(
      train_output_path,
      FLAGS.num_shards,
      label_map_dict,
      image_dir,
      mask_name,
      train_examples, #liste avec les noms des images choisies pour le train
      faces_only=FLAGS.faces_only,
      mask_type=FLAGS.mask_type)
  create_tf_record(
      val_output_path,
      FLAGS.num_shards,
      label_map_dict,
      image_dir, #path to images
      mask_name,
      val_examples,#liste avec les noms des images choisies pour la validation
      faces_only=FLAGS.faces_only,
      mask_type=FLAGS.mask_type)
def main(_):
    args = process_command_line()

    if args.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    output = os.path.join(args.data_dir, args.output_path)

    # touch the file if it doesn't already exist
    if not os.path.exists(output):
        with open(output, 'a'):
            os.utime(output)

    writer = tf.python_io.TFRecordWriter(output)
    label_map_dict = label_map_util.get_label_map_dict(
        os.path.join(args.data_dir, args.label_map_path))
    print('Reading from %s dataset.', args.collection)
    examples_path = os.path.join(args.data_dir, args.collection,
                                 args.set + '.txt')
    annotations_dir = os.path.join(args.data_dir, args.collection,
                                   args.annotations_dir)

    with open(examples_path) as fid:
        lines = fid.readlines()
        examples_list = [line.strip() for line in lines]

    ttl_objs = 0
    for idx, example in enumerate(examples_list):
        if idx % 10 == 0:
            logging.info('Processing image %d of %d', idx, len(examples_list))
        file = os.path.join(annotations_dir, example)
        with open(file, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        tf_example, num_objs = dict_to_tf_example(data, args.data_dir,
                                                  label_map_dict, args.labels,
                                                  conf.PNG_DIR)
        if tf_example:
            ttl_objs += num_objs
            writer.write(tf_example.SerializeToString())
        else:
            logging.warn('No objects found in {0}'.format(example))

    writer.close()
    print('Done. Found {0} examples in {1} set'.format(ttl_objs, args.set))
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    if FLAGS.year not in YEARS:
        raise ValueError('year must be in : {}'.format(YEARS))

    data_dir = FLAGS.data_dir
    years = ['VOC2007', 'VOC2012']
    if FLAGS.year != 'merged':
        years = [FLAGS.year]

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    #import pdb; pdb.set_trace()

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)
        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     'Airplane_' + FLAGS.set + '.txt')
        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
        examples_list = dataset_util.read_examples_list(examples_path)
        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')

            #import pdb; pdb.set_trace()
            try:
                with tf.gfile.GFile(path, 'r') as fid:
                    xml_str = fid.read()
                xml = etree.fromstring(xml_str)
                data = dataset_util.recursive_parse_xml_to_dict(
                    xml)['annotation']

                #print("data: ", data)
                #print("FLAGS.data_dir: ", FLAGS.data_dir)
                tf_example = dict_to_tf_example(
                    data, FLAGS.data_dir, label_map_dict,
                    FLAGS.ignore_difficult_instances)

                writer.write(tf_example.SerializeToString())
            except:
                print("error at: ", path)

    writer.close()
    def _create_tf_data(self, annotation_file, ratio=0.7):
        category = []
        obj_list = []

        valid_obj_anno = 0
        with open(annotation_file) as f:
            annotation_data = json.load(f)
            img_num = len(annotation_data)
            for i in range(img_num):
                if 'boundbox' in annotation_data[i]:
                    obj_num = len(annotation_data[i]['boundbox'])
                    valid_obj_anno += obj_num
                    for j in range(obj_num):
                        obj_idx_local = j
                        img_idx = i
                        obj_list.append((img_idx,  obj_idx_local))
                        label = annotation_data[i]['boundbox'][j]['label']
                        if label not in category:
                            category.append(label)
        if valid_obj_anno == 0:
            return valid_obj_anno
        label_map_file = os.path.join(self.local_path, 'label_map.pbtxt')
        with open(label_map_file, mode='w') as f:
            offset = 1
            for idx in range(len(category)):
                f.write('item { \n  id: %d\n  name: \'%s\'\n}\n\n' % (idx + offset, category[idx]))

        random.seed(42)
        random.shuffle(obj_list)
        num_train = int(ratio * len(obj_list))
        train_examples = obj_list[:num_train]
        val_examples = obj_list[num_train:]
        label_map_dict = label_map_util.get_label_map_dict(label_map_file)
        train_output_path = os.path.join(self.local_path, 'train.record')
        val_output_path = os.path.join(self.local_path, 'val.record')

        self.create_tf_record(
            train_output_path,
            annotation_data,
            label_map_dict,
            train_examples)
        self.create_tf_record(
            val_output_path,
            annotation_data,
            label_map_dict,
            val_examples)
def main(_):
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    # logging.info('Reading from Pet dataset.')
    image_dir = os.path.join(data_dir, 'images')
    annotations_dir = os.path.join(data_dir, 'annotations')

    output_path = os.path.join(FLAGS.output_dir, 'train.record')

    create_tf_record(output_path,
                     FLAGS.num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     faces_only=FLAGS.faces_only,
                     mask_type=FLAGS.mask_type)
 def get_detection_graph_and_category_index(self, model_name,
                                            path_to_the_model_database):
     detection_graph = tf.Graph()
     with detection_graph.as_default():
         od_graph_def = tf.GraphDef()
         with tf.gfile.GFile(self.path_to_the_inference_graph, 'rb') as fid:
             serialized_graph = fid.read()
             od_graph_def.ParseFromString(serialized_graph)
             tf.import_graph_def(od_graph_def, name='')
     label_map = label_map_util.load_labelmap(self.path_to_label_map_file)
     self.num_classes = label_map_util.get_max_label_map_index(label_map)
     categories = label_map_util.convert_label_map_to_categories(
         label_map, max_num_classes=self.num_classes, use_display_name=True)
     category_index = label_map_util.create_category_index(categories)
     label_map_dict = label_map_util.get_label_map_dict(
         self.path_to_label_map_file, use_display_name=True)
     return detection_graph, category_index, label_map_dict
Exemple #24
0
    def __init__(self, config: dict):
        self._dataset = PascalVOCDataset()
        # check training parameters
        assert "model" in config and isinstance(config["model"],
                                                str), "`model` parameter is required, and must be an string"
        # check images folder
        assert "images_folder" in config and isinstance(config["images_folder"],
                                                        str), "`images folder` parameter is required, and must be an string"
        self._images_folder = Path(config["images_folder"])
        assert self._images_folder.exists(), "images folder not found"

        # reading config
        self._model_name = config["model"]
        self._masks_folder = Path(config.get("masks_folder", self._images_folder))
        self._xml_folder = Path(config.get("xml_folder", self._images_folder))

        # pre-trained model paths
        self._checkpoint_model_folder: Path = Path()
        self._checkpoint_model_pipeline_file: Path = Path()

        # check output folder
        if "output_folder" in config:
            assert isinstance(config["output_folder"], str), "`output_folder` must be an string"
            self._out_folder = Path(config["output_folder"]).joinpath(self._model_name)
        else:
            self._out_folder = Path(os.getcwd()).joinpath(os.path.sep.join(["models", self._model_name]))
        self._out_folder.mkdir(exist_ok=True, parents=True)

        # model attributes
        self._pipeline = None
        self._labels_map = None
        # reading label map
        labels_map = config.get("labels_map", None)
        if labels_map:
            if isinstance(labels_map, dict):
                self._labels_map = {k.title(): v for k,v in labels_map.items()}
            elif isinstance(labels_map, str) and os.path.isfile(labels_map):
                self._labels_map = get_label_map_dict(labels_map)
            else:
                raise Exception("`labels map` parameter must be a dictionary or a file")

        # new model paths
        self._pipeline_file = self._out_folder.joinpath("pipeline.config")
        self._labels_map_file = self._out_folder.joinpath("label_map.pbtxt")
        self._val_record_file = self._out_folder.joinpath("val.record")
        self._train_record_file = self._out_folder.joinpath("train.record")
def main(_):
  data_dir = FLAGS.data_dir
  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  logging.info('Reading from Pet dataset.')
  image_dir = os.path.join(data_dir, 'images')
  annotations_dir = os.path.join(data_dir, 'annotations')
  examples_path = os.path.join(annotations_dir, 'trainval.txt')
  examples_list = dataset_util.read_examples_list(examples_path)

  # Test images are not included in the downloaded .data set, so we shall perform
  # our own split.
  random.seed(42)
  random.shuffle(examples_list)
  num_examples = len(examples_list)
  num_train = int(0.7 * num_examples)
  train_examples = examples_list[:num_train]
  val_examples = examples_list[num_train:]
  logging.info('%d training and %d validation examples.',
               len(train_examples), len(val_examples))

  train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
  val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
  if not FLAGS.faces_only:
    train_output_path = os.path.join(FLAGS.output_dir,
                                     'pets_fullbody_with_masks_train.record')
    val_output_path = os.path.join(FLAGS.output_dir,
                                   'pets_fullbody_with_masks_val.record')
  create_tf_record(
      train_output_path,
      FLAGS.num_shards,
      label_map_dict,
      annotations_dir,
      image_dir,
      train_examples,
      faces_only=FLAGS.faces_only,
      mask_type=FLAGS.mask_type)
  create_tf_record(
      val_output_path,
      FLAGS.num_shards,
      label_map_dict,
      annotations_dir,
      image_dir,
      val_examples,
      faces_only=FLAGS.faces_only,
      mask_type=FLAGS.mask_type)
Exemple #26
0
def main(_):
    data_dir = FLAGS.data_dir
    train_output_path = FLAGS.output_dir
    image_dir = os.path.join(data_dir, FLAGS.image_dir)
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from dataset.')
    examples_list = os.listdir(image_dir)
    for el in examples_list:
        if el[-3:] != 'jpg':
            del examples_list[examples_list.index(el)]
    for el in examples_list:
        examples_list[examples_list.index(el)] = el[0:-4]

    create_tf_record(train_output_path, FLAGS.num_shards, label_map_dict,
                     annotations_dir, image_dir, examples_list)
 def __init__(self, args):
     self.args = args
     #self.FULL_LABEL_CLASSES=args.FULL_LABEL_CLASSES
     self.threshold = args.threshold
     
     tf.keras.backend.clear_session()
     self.detect_fn = tf.saved_model.load(args.modelbasefolder)
     
     label_map_path=args.labelmappath #'./models/research/object_detection/data/mscoco_label_map.pbtxt'
     label_map = label_map_util.load_labelmap(label_map_path)
     categories = label_map_util.convert_label_map_to_categories(
         label_map,
         max_num_classes=label_map_util.get_max_label_map_index(label_map),
         use_display_name=True)
     self.category_index = label_map_util.create_category_index(categories)
     label_map_dict = label_map_util.get_label_map_dict(label_map, use_display_name=True)
     self.FULL_LABEL_CLASSES=list(label_map_dict.keys())
def main(_):
    images_dir = FLAGS.images_dir
    image_files = dataset_util.read_examples_list(FLAGS.image_list_path)
    annotations_dir = os.path.join(images_dir, FLAGS.annotations_dir)
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    for idx, image_file in enumerate(image_files):
        print(idx, image_file)
        image_file_split = image_file.split('/')
        annotation_path = os.path.join(annotations_dir, os.path.splitext(image_file_split[-1])[0] + '.xml')
        with tf.gfile.GFile(annotation_path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        tf_example = dict_to_tf_example(data, image_file, annotations_dir, label_map_dict, FLAGS.include_masks, FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())
    writer.close()
def main(_):
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading dataset.')
    image_dir = os.path.join(data_dir, 'images')
    annotations_dir = os.path.join(data_dir, 'annotation_masks')

    examples_list = os.listdir(annotations_dir)
    # examples_list = examples_list[:100]

    # Test images are not included in the downloaded data set, so we shall perform
    # our own split.
    random.seed(42)
    random.shuffle(examples_list)
    num_examples = len(examples_list)
    num_train = int(0.7 * num_examples)
    train_examples = examples_list[:num_train]
    val_examples = examples_list[num_train:]
    logging.info('%d training and %d validation examples.',
                 len(train_examples), len(val_examples))

    train_name = 'output_train_tf.record'
    val_name = 'output_val_tf.record'
    train_output_path = os.path.join(FLAGS.output_dir, train_name)
    val_output_path = os.path.join(FLAGS.output_dir, val_name)

    reset_counters()
    create_tf_record(train_output_path,
                     FLAGS.num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     train_examples,
                     mask_type=FLAGS.mask_type)
    print_counters("train")
    reset_counters()
    create_tf_record(val_output_path,
                     FLAGS.num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     val_examples,
                     mask_type=FLAGS.mask_type)
    print_counters("eval")
Exemple #30
0
def main(_):

    label_map_dict = label_map_util.get_label_map_dict(
        FLAGS.label_map_path)  # label map --> FLAGS.label_map
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    dataset_list = FLAGS.data_dir.split(',')
    for dataset in dataset_list:
        if dataset.split('.')[-1] == r'yaml':
            ## FOR YAML
            examples_list = get_imgs_from_yaml(dataset)
            for example in examples_list:
                tf_example = create_tf_record(example,
                                              label_map_dict,
                                              is_yaml=True,
                                              ignore_difficult_instances=FLAGS.
                                              ignore_difficult_instances)
                writer.write(tf_example.SerializeToString())
        else:
            ## FOR XML
            annotations_dir = os.path.join(dataset, FLAGS.annotations_dir)
            examples_list = [
                os.path.splitext(name)[0] for name in os.listdir(dataset)
                if os.path.isfile(os.path.join(dataset, name))
            ]
            for example in examples_list:
                path = os.path.join(annotations_dir, example + '.xml')
                with tf.gfile.GFile(path, 'r') as fid:
                    xml_str = fid.read()
                xml = etree.fromstring(xml_str)
                data = dataset_util.recursive_parse_xml_to_dict(
                    xml)['annotation']
                # convert the path to the current file directory
                data['path'] = os.path.join(os.path.abspath(dataset),
                                            os.path.basename(data['path']))

                data['path'] = create_jpg_imgs(data['path'])

                tf_example = create_tf_record(data,
                                              label_map_dict,
                                              ignore_difficult_instances=FLAGS.
                                              ignore_difficult_instances)
                writer.write(tf_example.SerializeToString())

    writer.close()
Exemple #31
0
  def __init__(self,
               tensor_key,
               label_map_proto_file,
               shape_keys=None,
               shape=None,
               default_value=''):
    """Initializes the LookupTensor handler.

    Simply calls a vocabulary (most often, a label mapping) lookup.

    Args:
      tensor_key: the name of the `TFExample` feature to read the tensor from.
      label_map_proto_file: File path to a text format LabelMapProto message
        mapping class text to id.
      shape_keys: Optional name or list of names of the TF-Example feature in
        which the tensor shape is stored. If a list, then each corresponds to
        one dimension of the shape.
      shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is
        reshaped accordingly.
      default_value: The value used when the `tensor_key` is not found in a
        particular `TFExample`.

    Raises:
      ValueError: if both `shape_keys` and `shape` are specified.
    """
    name_to_id = label_map_util.get_label_map_dict(
        label_map_proto_file, use_display_name=False)
    # We use a default_value of -1, but we expect all labels to be contained
    # in the label map.
    try:
      # Dynamically try to load the tf v2 lookup, falling back to contrib
      lookup = tf.compat.v2.lookup
      hash_table_class = tf.compat.v2.lookup.StaticHashTable
    except AttributeError:
      lookup = contrib_lookup
      hash_table_class = contrib_lookup.HashTable
    name_to_id_table = hash_table_class(
        initializer=lookup.KeyValueTensorInitializer(
            keys=tf.constant(list(name_to_id.keys())),
            values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
        default_value=-1)

    self._name_to_id_table = name_to_id_table
    super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape,
                                              default_value)
def main(_):
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from Mappy Annotation dataset.')
    image_dir = os.path.join(data_dir, 'Images')
    annotations_dir = os.path.join(data_dir, 'Annotations')
    imageSets_dir = os.path.join(data_dir, 'ImageSets')
    train_examples_path = os.path.join(imageSets_dir, 'train.txt')
    train_examples = dataset_util.read_examples_list(train_examples_path)
    val_examples_path = os.path.join(imageSets_dir, 'test.txt')
    val_examples = dataset_util.read_examples_list(val_examples_path)

    # our own split.
    """
  random.seed(42)
  random.shuffle(examples_list)
  num_examples = len(examples_list)
  num_train = int(0.7 * num_examples)
  train_examples = examples_list[:num_train]
  val_examples = examples_list[num_train:]
  """
    logging.info('%d training and %d validation examples.',
                 len(train_examples), len(val_examples))

    train_output_path = os.path.join(FLAGS.output_dir,
                                     'mappy_blur_train.record')
    val_output_path = os.path.join(FLAGS.output_dir, 'mappy_blur_val.record')

    create_tf_record(train_output_path,
                     FLAGS.num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     train_examples,
                     faces_only=FLAGS.faces_only,
                     mask_type=FLAGS.mask_type)
    create_tf_record(val_output_path,
                     FLAGS.num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     val_examples,
                     faces_only=FLAGS.faces_only,
                     mask_type=FLAGS.mask_type)
Exemple #33
0
def main(_):
    data_dir = FLAGS.data_dir

    if not data_dir:
        logging.error('Must provide a data directory')
        return

    output_path = FLAGS.output_path

    if not output_path:
        logging.error('Must provide an output path')
        return

    label_map_path = FLAGS.label_map_path

    if not label_map_path:
        logging.error('Must provide a label map path')
        return

    writer = tf.python_io.TFRecordWriter(output_path)

    label_map_dict = label_map_util.get_label_map_dict(label_map_path)

    logging.info('Reading from data directory.')

    data_dir_jpg_query = os.path.join(data_dir, '*.jpg')

    for idx, image_path in enumerate(glob.glob(data_dir_jpg_query)):
        if idx % 20 == 0:
            logging.info('On image %d (%s)', idx, image_path)

        annotation_path = os.path.splitext(image_path)[0] + '.xml'

        with tf.gfile.GFile(annotation_path, 'r') as fid:
            xml_str = fid.read()

        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, image_path, label_map_dict,
                                        FLAGS.ignore_difficult_instances)

        writer.write(tf_example.SerializeToString())

    writer.close()
  def test_get_label_map_dict(self):
    label_map_string = """
      item {
        id:2
        name:'cat'
      }
      item {
        id:1
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    label_map_dict = label_map_util.get_label_map_dict(label_map_path)
    self.assertEqual(label_map_dict['dog'], 1)
    self.assertEqual(label_map_dict['cat'], 2)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir')
    parser.add_argument('--output_dir')
    args = parser.parse_args()

    print(args.data_dir)
    label_map_path = os.path.join(args.data_dir, 'tf_label_map.pbtxt')
    class_dict = label_map_util.get_label_map_dict(label_map_path)

    text = u""
    for i, name in enumerate(class_dict):
        print i, name
        txt = u"""{0} {1}""".format(i, name)
        text = text + txt
    output_path = os.path.join(args.output_dir, 'tf_labels.txt')
    with open(output_path, 'w') as f:
        f.write(text)
Exemple #36
0
    def test_get_label_map_dict(self):
        label_map_string = """
          item {
            id:2
            name:'cat'
          }
          item {
            id:1
            name:'dog'
          }
         """
        label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
        with tf.gfile.Open(label_map_path, 'wb') as f:
            f.write(label_map_string)

        label_map_dict = label_map_util.get_label_map_dict(label_map_path)
        self.assertEqual(label_map_dict['dog'], 1)
        self.assertEqual(label_map_dict['cat'], 2)
Exemple #37
0
def main(_):
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from dataset.')
    train_path = os.path.join(data_dir, 'train.txt')
    train_examples = dataset_util.read_examples_list(train_path)

    val_path = os.path.join(data_dir, 'eval.txt')
    val_examples = dataset_util.read_examples_list(val_path)

    logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples))

    train_output_path = os.path.join(FLAGS.output_dir, 'tf_train_all.record')
    val_output_path = os.path.join(FLAGS.output_dir, 'tf_val_all.record')

    create_tf_record(train_output_path, label_map_dict, train_examples)
    create_tf_record(val_output_path, label_map_dict, val_examples)
Exemple #38
0
  def test_get_label_map_dict_with_fill_in_gaps_and_background(self):
    label_map_string = """
      item {
        id:3
        name:'cat'
      }
      item {
        id:1
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    label_map_dict = label_map_util.get_label_map_dict(
        label_map_path, fill_in_gaps_and_background=True)

    self.assertEqual(label_map_dict['background'], 0)
    self.assertEqual(label_map_dict['dog'], 1)
    self.assertEqual(label_map_dict['class_2'], 2)
    self.assertEqual(label_map_dict['cat'], 3)
    self.assertEqual(len(label_map_dict), max(label_map_dict.values()) + 1)
def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
                               label_map_path, validation_set_size):
  """Convert the KITTI detection dataset to TFRecords.

  Args:
    data_dir: The full path to the unzipped folder containing the unzipped data
      from data_object_image_2 and data_object_label_2.zip.
      Folder structure is assumed to be: data_dir/training/label_2 (annotations)
      and data_dir/data_object_image_2/training/image_2 (images).
    output_path: The path to which TFRecord files will be written. The TFRecord
      with the training set will be located at: <output_path>_train.tfrecord
      And the TFRecord with the validation set will be located at:
      <output_path>_val.tfrecord
    classes_to_use: List of strings naming the classes for which data should be
      converted. Use the same names as presented in the KIITI README file.
      Adding dontcare class will remove all other bounding boxes that overlap
      with areas marked as dontcare regions.
    label_map_path: Path to label map proto
    validation_set_size: How many images should be left as the validation set.
      (Ffirst `validation_set_size` examples are selected to be in the
      validation set).
  """
  label_map_dict = label_map_util.get_label_map_dict(label_map_path)
  train_count = 0
  val_count = 0

  annotation_dir = os.path.join(data_dir,
                                'training',
                                'label_2')

  image_dir = os.path.join(data_dir,
                           'data_object_image_2',
                           'training',
                           'image_2')

  train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'%
                                             output_path)
  val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'%
                                           output_path)

  images = sorted(tf.gfile.ListDirectory(image_dir))
  for img_name in images:
    img_num = int(img_name.split('.')[0])
    is_validation_img = img_num < validation_set_size
    img_anno = read_annotation_file(os.path.join(annotation_dir,
                                                 str(img_num).zfill(6)+'.txt'))

    image_path = os.path.join(image_dir, img_name)

    # Filter all bounding boxes of this frame that are of a legal class, and
    # don't overlap with a dontcare region.
    # TODO(talremez) filter out targets that are truncated or heavily occluded.
    annotation_for_image = filter_annotations(img_anno, classes_to_use)

    example = prepare_example(image_path, annotation_for_image, label_map_dict)
    if is_validation_img:
      val_writer.write(example.SerializeToString())
      val_count += 1
    else:
      train_writer.write(example.SerializeToString())
      train_count += 1

  train_writer.close()
  val_writer.close()
  def __init__(self,
               load_instance_masks=False,
               instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
               label_map_proto_file=None,
               use_display_name=False,
               dct_method=''):
    """Constructor sets keys_to_features and items_to_handlers.

    Args:
      load_instance_masks: whether or not to load and handle instance masks.
      instance_mask_type: type of instance masks. Options are provided in
        input_reader.proto. This is only used if `load_instance_masks` is True.
      label_map_proto_file: a file path to a
        object_detection.protos.StringIntLabelMap proto. If provided, then the
        mapped IDs of 'image/object/class/text' will take precedence over the
        existing 'image/object/class/label' ID.  Also, if provided, it is
        assumed that 'image/object/class/text' will be in the data.
      use_display_name: whether or not to use the `display_name` for label
        mapping (instead of `name`).  Only used if label_map_proto_file is
        provided.
      dct_method: An optional string. Defaults to None. It only takes
        effect when image format is jpeg, used to specify a hint about the
        algorithm used for jpeg decompression. Currently valid values
        are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
        example, the jpeg library does not have that specific option.

    Raises:
      ValueError: If `instance_mask_type` option is not one of
        input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
        input_reader_pb2.PNG_MASKS.
    """
    self.keys_to_features = {
        'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/filename':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/key/sha256':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/source_id':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/height':
            tf.FixedLenFeature((), tf.int64, 1),
        'image/width':
            tf.FixedLenFeature((), tf.int64, 1),
        # Object boxes and classes.
        'image/object/bbox/xmin':
            tf.VarLenFeature(tf.float32),
        'image/object/bbox/xmax':
            tf.VarLenFeature(tf.float32),
        'image/object/bbox/ymin':
            tf.VarLenFeature(tf.float32),
        'image/object/bbox/ymax':
            tf.VarLenFeature(tf.float32),
        'image/object/class/label':
            tf.VarLenFeature(tf.int64),
        'image/object/class/text':
            tf.VarLenFeature(tf.string),
        'image/object/area':
            tf.VarLenFeature(tf.float32),
        'image/object/is_crowd':
            tf.VarLenFeature(tf.int64),
        'image/object/difficult':
            tf.VarLenFeature(tf.int64),
        'image/object/group_of':
            tf.VarLenFeature(tf.int64),
        'image/object/weight':
            tf.VarLenFeature(tf.float32),
    }
    if dct_method:
      image = slim_example_decoder.Image(
          image_key='image/encoded',
          format_key='image/format',
          channels=3,
          dct_method=dct_method)
    else:
      image = slim_example_decoder.Image(
          image_key='image/encoded', format_key='image/format', channels=3)
    self.items_to_handlers = {
        fields.InputDataFields.image:
            image,
        fields.InputDataFields.source_id: (
            slim_example_decoder.Tensor('image/source_id')),
        fields.InputDataFields.key: (
            slim_example_decoder.Tensor('image/key/sha256')),
        fields.InputDataFields.filename: (
            slim_example_decoder.Tensor('image/filename')),
        # Object boxes and classes.
        fields.InputDataFields.groundtruth_boxes: (
            slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                             'image/object/bbox/')),
        fields.InputDataFields.groundtruth_area:
            slim_example_decoder.Tensor('image/object/area'),
        fields.InputDataFields.groundtruth_is_crowd: (
            slim_example_decoder.Tensor('image/object/is_crowd')),
        fields.InputDataFields.groundtruth_difficult: (
            slim_example_decoder.Tensor('image/object/difficult')),
        fields.InputDataFields.groundtruth_group_of: (
            slim_example_decoder.Tensor('image/object/group_of')),
        fields.InputDataFields.groundtruth_weights: (
            slim_example_decoder.Tensor('image/object/weight')),
    }
    if load_instance_masks:
      if instance_mask_type in (input_reader_pb2.DEFAULT,
                                input_reader_pb2.NUMERICAL_MASKS):
        self.keys_to_features['image/object/mask'] = (
            tf.VarLenFeature(tf.float32))
        self.items_to_handlers[
            fields.InputDataFields.groundtruth_instance_masks] = (
                slim_example_decoder.ItemHandlerCallback(
                    ['image/object/mask', 'image/height', 'image/width'],
                    self._reshape_instance_masks))
      elif instance_mask_type == input_reader_pb2.PNG_MASKS:
        self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
        self.items_to_handlers[
            fields.InputDataFields.groundtruth_instance_masks] = (
                slim_example_decoder.ItemHandlerCallback(
                    ['image/object/mask', 'image/height', 'image/width'],
                    self._decode_png_instance_masks))
      else:
        raise ValueError('Did not recognize the `instance_mask_type` option.')
    if label_map_proto_file:
      label_map = label_map_util.get_label_map_dict(label_map_proto_file,
                                                    use_display_name)
      # We use a default_value of -1, but we expect all labels to be contained
      # in the label map.
      table = tf.contrib.lookup.HashTable(
          initializer=tf.contrib.lookup.KeyValueTensorInitializer(
              keys=tf.constant(list(label_map.keys())),
              values=tf.constant(list(label_map.values()), dtype=tf.int64)),
          default_value=-1)
      # If the label_map_proto is provided, try to use it in conjunction with
      # the class text, and fall back to a materialized ID.
      # TODO(lzc): note that here we are using BackupHandler defined in this
      # file(which is branching slim_example_decoder.BackupHandler). Need to
      # switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
      # more popular.
      label_handler = BackupHandler(
          slim_example_decoder.LookupTensor(
              'image/object/class/text', table, default_value=''),
          slim_example_decoder.Tensor('image/object/class/label'))
    else:
      label_handler = slim_example_decoder.Tensor('image/object/class/label')
    self.items_to_handlers[
        fields.InputDataFields.groundtruth_classes] = label_handler