def create_tfrecords(config_set, config_folders): # parameters for the creation of tfrecords params = json2dict(config_set) # folder structure of the project folders = json2dict(config_folders) # path to the list file of this set list_path = folders["sets"] + params["file_list"] # path to the image folder of this set images_folder = folders[params["set_name"] + "_images"] # path to the annotation folder of this set labels_folder = folders[params["set_name"] + "_labels"] # path of the output folder out_folder = folders[params["set_name"] + "_tfrecords"] if not tf.io.gfile.exists(out_folder): tf.io.gfile.makedirs(out_folder) logging.info('Writing to output directory: %s', out_folder) writers = [ tf.io.TFRecordWriter(out_folder + '/icubw-%05d-of-%05d.tfrecord' % (i, params["num_shards"])) for i in range(params["num_shards"]) ] examples_list = tfrecord_util.read_examples_list(list_path) logging.info('Reading from iCub World dataset.') for idx, example in enumerate(examples_list): if params["num_images"] and idx >= params["num_images"]: break if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(labels_folder, example + '.xml') with tf.io.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = tfrecord_util.recursive_parse_xml_to_dict(xml)['annotation'] img_path = os.path.join(images_folder + example + ".jpg") if os.path.isfile(img_path): tf_example = dict_to_tf_example(data, img_path, params["classes_dict"]) writers[idx % params["num_shards"]].write( tf_example.SerializeToString()) else: print(img_path) for writer in writers: writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) if not FLAGS.output_path: raise ValueError('output_path cannot be empty.') data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] output_dir = os.path.dirname(FLAGS.output_path) if not tf.io.gfile.exists(output_dir): tf.io.gfile.makedirs(output_dir) logging.info('Writing to output directory: %s', output_dir) writers = [ tf.io.TFRecordWriter(FLAGS.output_path + '-%05d-of-%05d.tfrecord' % (i, FLAGS.num_shards)) for i in range(FLAGS.num_shards) ] if FLAGS.label_map_json_path: with tf.io.gfile.GFile(FLAGS.label_map_json_path, 'rb') as f: label_map_dict = json.load(f) else: label_map_dict = pascal_label_map_dict ann_json_dict = { 'images': [], 'type': 'instances', 'annotations': [], 'categories': [] } for year in years: example_class = list(label_map_dict.keys())[1] examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', example_class + '_' + FLAGS.set + '.txt') examples_list = tfrecord_util.read_examples_list(examples_path) annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) for class_name, class_id in label_map_dict.items(): cls = {'supercategory': 'none', 'id': class_id, 'name': class_name} ann_json_dict['categories'].append(cls) logging.info('Reading from PASCAL %s dataset.', year) for idx, example in enumerate(examples_list): if FLAGS.num_images and idx >= FLAGS.num_images: break if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.io.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = tfrecord_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances, ann_json_dict=ann_json_dict) writers[idx % FLAGS.num_shards].write( tf_example.SerializeToString()) for writer in writers: writer.close() json_file_path = os.path.join( os.path.dirname(FLAGS.output_path), 'json_' + os.path.basename(FLAGS.output_path) + '.json') with tf.io.gfile.GFile(json_file_path, 'w') as f: json.dump(ann_json_dict, f)
def main(_): if FLAGS.set not in SETS: raise ValueError("set must be in : {}".format(SETS)) if not FLAGS.output_path: raise ValueError("output_path cannot be empty.") data_dir = FLAGS.data_dir output_dir = os.path.dirname(FLAGS.output_path) if not tf.io.gfile.exists(output_dir): tf.io.gfile.makedirs(output_dir) logging.info("Writing to output directory: %s", output_dir) writers = [ tf.python_io.TFRecordWriter( FLAGS.output_path + "-%05d-of-%05d.tfrecord" % (i, FLAGS.num_shards) ) for i in range(FLAGS.num_shards) ] if FLAGS.label_map_json_path: with tf.io.gfile.GFile(FLAGS.label_map_json_path, "rb") as f: label_map_dict = json.load(f) else: label_map_dict = pascal_label_map_dict if FLAGS.image_dir: image_dir = FLAGS.image_dir else: image_dir = "JPEGImages" ann_json_dict = { "images": [], "type": "instances", "annotations": [], "categories": [], } example_class = list(label_map_dict.keys())[1] """ examples_path = os.path.join(data_dir, 'ImageSets', 'Main', example_class + '_' + FLAGS.set + '.txt') examples_list = tfrecord_util.read_examples_list(examples_path) """ examples_list = glob.glob(os.path.join(data_dir, FLAGS.annotations_dir, "*")) annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) for class_name, class_id in label_map_dict.items(): cls = {"supercategory": "none", "id": class_id, "name": class_name} ann_json_dict["categories"].append(cls) for idx, example in enumerate(examples_list): if FLAGS.num_images and idx >= FLAGS.num_images: break if idx % 100 == 0: logging.info("On image %d of %d", idx, len(examples_list)) # path = os.path.join(annotations_dir, example + '.xml') path = example with tf.gfile.GFile(path, "r") as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = tfrecord_util.recursive_parse_xml_to_dict(xml)["annotation"] tf_example = dict_to_tf_example( data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances, image_dir, ann_json_dict=ann_json_dict, ) writers[idx % FLAGS.num_shards].write(tf_example.SerializeToString()) for writer in writers: writer.close() json_file_path = os.path.join( os.path.dirname(FLAGS.output_path), "json_" + os.path.basename(FLAGS.output_path) + ".json", ) with tf.io.gfile.GFile(json_file_path, "w") as f: json.dump(ann_json_dict, f)
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) if not FLAGS.output_path: raise ValueError('output_path cannot be empty.') data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] logging.info('writing to output path: %s', FLAGS.output_path) writers = [ tf.python_io.TFRecordWriter( FLAGS.output_path + '-%05d-of-%05d.tfrecord' % (i, FLAGS.num_shards)) for i in range(FLAGS.num_shards) ] if FLAGS.label_map_json_path: with tf.io.gfile.GFile(FLAGS.label_map_json_path, 'rb') as f: label_map_dict = json.load(f) else: label_map_dict = pascal_label_map_dict for year in years: ann_json_dict = { 'images': [], 'type': 'instances', 'annotations': [], 'categories': [] } for class_name, class_id in label_map_dict.items(): cls = {'supercategory': 'none', 'id': class_id, 'name': class_name} ann_json_dict['categories'].append(cls) logging.info('Reading from PASCAL %s dataset.', year) # examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', # 'aeroplane_' + FLAGS.set + '.txt') # annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) # examples_list = tfrecord_util.read_examples_list(examples_path) with open("/home/dams/Documents/val.txt") as f: examples_list = f.readlines() examples_list = [x.strip() for x in examples_list] examples_list = ["/media/dams/Windows/Users/Dam\'s/Pictures/Datura/labels/xml/" + x for x in examples_list] # examples_list = [x.replace('/home/dgameiro', '/data/DB_2020') for x in examples_list] # examples_list = [x.replace('images', 'labels/xml') for x in examples_list] examples_list = [x.replace('png', 'xml') for x in examples_list] for idx, example in enumerate(examples_list): if FLAGS.num_images and idx >= FLAGS.num_images: break if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) # path = os.path.join(annotations_dir, example + '.xml') # with tf.gfile.GFile(path, 'r') as fid: with tf.gfile.GFile(example, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = tfrecord_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, example, label_map_dict, FLAGS.ignore_difficult_instances, ann_json_dict=ann_json_dict) writers[idx % FLAGS.num_shards].write(tf_example.SerializeToString()) for writer in writers: writer.close() json_file_path = os.path.join( os.path.dirname(FLAGS.output_path), 'json_' + os.path.basename(FLAGS.output_path) + '.json') with tf.io.gfile.GFile(json_file_path, 'w') as f: json.dump(ann_json_dict, f)