def create_tfrecords(config_set, config_folders):

    # parameters for the creation of tfrecords
    params = json2dict(config_set)

    # folder structure of the project
    folders = json2dict(config_folders)

    # path to the list file of this set
    list_path = folders["sets"] + params["file_list"]

    # path to the image folder of this set
    images_folder = folders[params["set_name"] + "_images"]

    # path to the annotation folder of this set
    labels_folder = folders[params["set_name"] + "_labels"]

    # path of the output folder
    out_folder = folders[params["set_name"] + "_tfrecords"]

    if not tf.io.gfile.exists(out_folder):
        tf.io.gfile.makedirs(out_folder)
    logging.info('Writing to output directory: %s', out_folder)

    writers = [
        tf.io.TFRecordWriter(out_folder + '/icubw-%05d-of-%05d.tfrecord' %
                             (i, params["num_shards"]))
        for i in range(params["num_shards"])
    ]

    examples_list = tfrecord_util.read_examples_list(list_path)

    logging.info('Reading from iCub World dataset.')
    for idx, example in enumerate(examples_list):
        if params["num_images"] and idx >= params["num_images"]:
            break
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        path = os.path.join(labels_folder, example + '.xml')
        with tf.io.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = tfrecord_util.recursive_parse_xml_to_dict(xml)['annotation']

        img_path = os.path.join(images_folder + example + ".jpg")

        if os.path.isfile(img_path):
            tf_example = dict_to_tf_example(data, img_path,
                                            params["classes_dict"])
            writers[idx % params["num_shards"]].write(
                tf_example.SerializeToString())
        else:
            print(img_path)

    for writer in writers:
        writer.close()
Beispiel #2
0
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    if FLAGS.year not in YEARS:
        raise ValueError('year must be in : {}'.format(YEARS))
    if not FLAGS.output_path:
        raise ValueError('output_path cannot be empty.')

    data_dir = FLAGS.data_dir
    years = ['VOC2007', 'VOC2012']
    if FLAGS.year != 'merged':
        years = [FLAGS.year]

    output_dir = os.path.dirname(FLAGS.output_path)
    if not tf.io.gfile.exists(output_dir):
        tf.io.gfile.makedirs(output_dir)
    logging.info('Writing to output directory: %s', output_dir)

    writers = [
        tf.io.TFRecordWriter(FLAGS.output_path + '-%05d-of-%05d.tfrecord' %
                             (i, FLAGS.num_shards))
        for i in range(FLAGS.num_shards)
    ]

    if FLAGS.label_map_json_path:
        with tf.io.gfile.GFile(FLAGS.label_map_json_path, 'rb') as f:
            label_map_dict = json.load(f)
    else:
        label_map_dict = pascal_label_map_dict

    ann_json_dict = {
        'images': [],
        'type': 'instances',
        'annotations': [],
        'categories': []
    }
    for year in years:
        example_class = list(label_map_dict.keys())[1]
        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     example_class + '_' + FLAGS.set + '.txt')
        examples_list = tfrecord_util.read_examples_list(examples_path)
        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)

        for class_name, class_id in label_map_dict.items():
            cls = {'supercategory': 'none', 'id': class_id, 'name': class_name}
            ann_json_dict['categories'].append(cls)

        logging.info('Reading from PASCAL %s dataset.', year)
        for idx, example in enumerate(examples_list):
            if FLAGS.num_images and idx >= FLAGS.num_images:
                break
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.io.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = tfrecord_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data,
                                            FLAGS.data_dir,
                                            label_map_dict,
                                            FLAGS.ignore_difficult_instances,
                                            ann_json_dict=ann_json_dict)
            writers[idx % FLAGS.num_shards].write(
                tf_example.SerializeToString())

    for writer in writers:
        writer.close()

    json_file_path = os.path.join(
        os.path.dirname(FLAGS.output_path),
        'json_' + os.path.basename(FLAGS.output_path) + '.json')
    with tf.io.gfile.GFile(json_file_path, 'w') as f:
        json.dump(ann_json_dict, f)
Beispiel #3
0
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError("set must be in : {}".format(SETS))
    if not FLAGS.output_path:
        raise ValueError("output_path cannot be empty.")

    data_dir = FLAGS.data_dir

    output_dir = os.path.dirname(FLAGS.output_path)
    if not tf.io.gfile.exists(output_dir):
        tf.io.gfile.makedirs(output_dir)
    logging.info("Writing to output directory: %s", output_dir)

    writers = [
        tf.python_io.TFRecordWriter(
            FLAGS.output_path + "-%05d-of-%05d.tfrecord" % (i, FLAGS.num_shards)
        )
        for i in range(FLAGS.num_shards)
    ]

    if FLAGS.label_map_json_path:
        with tf.io.gfile.GFile(FLAGS.label_map_json_path, "rb") as f:
            label_map_dict = json.load(f)
    else:
        label_map_dict = pascal_label_map_dict
    if FLAGS.image_dir:
        image_dir = FLAGS.image_dir
    else:
        image_dir = "JPEGImages"

    ann_json_dict = {
        "images": [],
        "type": "instances",
        "annotations": [],
        "categories": [],
    }
    example_class = list(label_map_dict.keys())[1]
    """
  examples_path = os.path.join(data_dir, 'ImageSets', 'Main',
                               example_class + '_' + FLAGS.set + '.txt')
  examples_list = tfrecord_util.read_examples_list(examples_path)
  """
    examples_list = glob.glob(os.path.join(data_dir, FLAGS.annotations_dir, "*"))
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)

    for class_name, class_id in label_map_dict.items():
        cls = {"supercategory": "none", "id": class_id, "name": class_name}
        ann_json_dict["categories"].append(cls)

    for idx, example in enumerate(examples_list):
        if FLAGS.num_images and idx >= FLAGS.num_images:
            break
        if idx % 100 == 0:
            logging.info("On image %d of %d", idx, len(examples_list))
        # path = os.path.join(annotations_dir, example + '.xml')
        path = example
        with tf.gfile.GFile(path, "r") as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = tfrecord_util.recursive_parse_xml_to_dict(xml)["annotation"]

        tf_example = dict_to_tf_example(
            data,
            FLAGS.data_dir,
            label_map_dict,
            FLAGS.ignore_difficult_instances,
            image_dir,
            ann_json_dict=ann_json_dict,
        )
        writers[idx % FLAGS.num_shards].write(tf_example.SerializeToString())

    for writer in writers:
        writer.close()

    json_file_path = os.path.join(
        os.path.dirname(FLAGS.output_path),
        "json_" + os.path.basename(FLAGS.output_path) + ".json",
    )
    with tf.io.gfile.GFile(json_file_path, "w") as f:
        json.dump(ann_json_dict, f)
Beispiel #4
0
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))
  if not FLAGS.output_path:
    raise ValueError('output_path cannot be empty.')

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  logging.info('writing to output path: %s', FLAGS.output_path)
  writers = [
      tf.python_io.TFRecordWriter(
          FLAGS.output_path + '-%05d-of-%05d.tfrecord' % (i, FLAGS.num_shards))
      for i in range(FLAGS.num_shards)
  ]

  if FLAGS.label_map_json_path:
    with tf.io.gfile.GFile(FLAGS.label_map_json_path, 'rb') as f:
      label_map_dict = json.load(f)
  else:
    label_map_dict = pascal_label_map_dict

  for year in years:
    ann_json_dict = {
        'images': [],
        'type': 'instances',
        'annotations': [],
        'categories': []
    }
    for class_name, class_id in label_map_dict.items():
      cls = {'supercategory': 'none', 'id': class_id, 'name': class_name}
      ann_json_dict['categories'].append(cls)

    logging.info('Reading from PASCAL %s dataset.', year)
    # examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 # 'aeroplane_' + FLAGS.set + '.txt')
    # annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    # examples_list = tfrecord_util.read_examples_list(examples_path)
    with open("/home/dams/Documents/val.txt") as f:
      examples_list = f.readlines()
    examples_list = [x.strip() for x in examples_list] 
    examples_list = ["/media/dams/Windows/Users/Dam\'s/Pictures/Datura/labels/xml/" + x for x in examples_list]
    # examples_list = [x.replace('/home/dgameiro', '/data/DB_2020') for x in examples_list]
    # examples_list = [x.replace('images', 'labels/xml') for x in examples_list]
    examples_list = [x.replace('png', 'xml') for x in examples_list]
    for idx, example in enumerate(examples_list):
      if FLAGS.num_images and idx >= FLAGS.num_images:
        break
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      # path = os.path.join(annotations_dir, example + '.xml')
      # with tf.gfile.GFile(path, 'r') as fid:
      with tf.gfile.GFile(example, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = tfrecord_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, example, label_map_dict,
                                      FLAGS.ignore_difficult_instances,
                                      ann_json_dict=ann_json_dict)
      writers[idx % FLAGS.num_shards].write(tf_example.SerializeToString())

  for writer in writers:
    writer.close()

  json_file_path = os.path.join(
      os.path.dirname(FLAGS.output_path),
      'json_' + os.path.basename(FLAGS.output_path) + '.json')
  with tf.io.gfile.GFile(json_file_path, 'w') as f:
    json.dump(ann_json_dict, f)