def main(_): writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) image_dir = FLAGS.image_dir annotations_dir = FLAGS.annotations_dir logging.info('Reading from dataset: ' + annotations_dir) examples_list = os.listdir(annotations_dir) for idx, example in enumerate(examples_list): if example.endswith('.xml'): if idx % 50 == 0: print('On image %d of %d' % (idx, len(examples_list))) path = os.path.join(annotations_dir, example) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, image_dir, label_map_dict) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('Set arg must be in {}'.format(SETS)) set_name = FLAGS.set data_dir = FLAGS.data_dir # Validate data directory if not data_dir: raise ValueError('Did not specify required arg "data_dir"') elif os.path.isfile(data_dir) == False: raise ValueError( 'data_dir arg does not appear to be real directory, please check it.' ) output_path_root = FLAGS.output_path # Infer the full output path unless it looks like a full path if output_path_root.startswith('/') == False: # Then it's a relative path like we were aiming for output_path_root = os.path.join(data_dir, output_path_root) if os.path.isfile(output_path_root) == False: raise ValueError( 'Could not validate that target output path exists, please check arg.' ) tf_filename = set_name + '.tfrecord' output_path = os.path.join(output_path_root, tf_filename) writer = tf.python_io.TFRecordWriter(output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) samples_path_root = os.path.join(data_dir, 'ImageSets', 'Main') if os.path.isfile(samples_path_root) == False: raise ValueError( 'Expected ImagePath/Main to be in samples path, did not find.') # Get the names of .txt files in there and infer the one we need for rootpath, dirnames, filenames in os.walk(samples_path_root): for filename in filenames: if filename.endswith('.txt'): # In worst case we'll try whatever ends in TXT there samples_path = filename # But preferably the filename has the set name in it if filename.contains(set_name): break annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) # Should not need to validate samples_list since we derived it from existing TXT's samples_list = dataset_util.read_samples_list(samples_path) for idx, sample in enumerate(samples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(samples_list)) path = os.path.join(annotations_dir, sample + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_record = dict_to_tf_record(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_record.SerializeToString()) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir os.makedirs(os.path.dirname(FLAGS.output_path), exist_ok=True) writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) examples_path = os.path.join(data_dir, FLAGS.set + '.txt') annotations_dir = os.path.join( os.path.split(data_dir)[0], FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in tqdm( enumerate(examples_list), desc="Parsing annotations from {0} set into TF-Example".format( FLAGS.set), total=len(examples_list)): path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, FLAGS.set, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) annotaions_path = os.path.join(data_dir, "annotations") idx = 0 tf_idx = 0 file_list = os.listdir(annotaions_path) print(annotaions_path) # file_list = shuffle(file_list) # for fname in l: while idx < len(file_list): tf_filename = get_output_filename(FLAGS.output_path, tf_idx) writer = tf.python_io.TFRecordWriter(tf_filename) j = 0 while idx < len(file_list) and j < int(FLAGS.samples_per_file): fname = file_list[idx] path = os.path.join(annotaions_path, fname[0:-4] + ".xml") with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) idx += 1 j += 1 tf_idx += 1 writer.close()
def main(_): data_dir = FLAGS.data_dir output_dir = FLAGS.output_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) images_dir = os.path.join(data_dir, 'images') if not os.path.exists(images_dir): raise RuntimeError('`data_dir`目录下面需要有`images`图片目录') annotations_dir = os.path.join(data_dir, 'annotations') if not os.path.exists(annotations_dir): raise RuntimeError('`data_dir`目录下面需要有`annotations`标注xml目录') for s in SETS: writer = tf.python_io.TFRecordWriter( os.path.join(output_dir, '{}.record'.format(s))) examples_path = os.path.join(data_dir, 'set', '{}.txt'.format(s)) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, images_dir, label_map_dict) writer.write(tf_example.SerializeToString()) print('[INFO] `{}.record` 保存在 `{}` 成功'.format(s, output_dir)) writer.close()
def generate_tfrecord(data_dir, tf_recod_path): data_dir = os.path.abspath(data_dir) output_path = os.path.abspath(tf_recod_path) ignore_difficult_instances = False writer = tf.python_io.TFRecordWriter(output_path) label_map_dict = {'0': 0, '1': 1} from glob import glob xml_files = glob(os.path.join(data_dir, '*.xml')) img_files=glob(os.path.join(data_dir,'*.jpg'))+ \ glob(os.path.join(data_dir, '*.png')) get_bld_filename = lambda path: os.path.splitext(os.path.basename(path))[0] get_bld_set = lambda filenames: set(map(get_bld_filename, filenames)) xml_4_check = get_bld_set(xml_files) img_4_check = get_bld_set(img_files) diff = xml_4_check.symmetric_difference(img_4_check) if len(diff) != 0: for item in diff: print(item, 'xml or img file lost') raise ('please check the data over and over again') for idx, (xml_path, img_path) in enumerate(zip(xml_files, img_files)): if idx % 100 == 0: print('On image {} of {}'.format(idx, len(xml_files))) with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read().encode('utf8') xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] data['folder'] = data_dir data['filename'] = img_path tf_example = dict_to_tf_example(data, data_dir, label_map_dict, ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def create_tf_record(output_filename, num_shards, label_map_dict, annotations_dir, image_dir, examples): with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] try: tf_example = dict_to_tf_example( data, label_map_dict, image_dir) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path)
def main(_): data_dir = FLAGS.data_dir output_dir = os.path.dirname(FLAGS.output_path) if os.path.exists(output_dir) is False: os.mkdir(output_dir) writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading dataset.') examples_path = os.path.join(data_dir, 'example', 'cooking_example.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): print('example', example) if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example( data, FLAGS.data_dir, label_map_dict, example, FLAGS.ignore_difficult_instances, ) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): images_dir = FLAGS.images_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) os.chdir(images_dir) file_types = ('*.jpg', '*.jpeg') image_files = [] for file_type in file_types: image_files.extend(glob.glob(file_type)) annotations_dir = os.path.join(images_dir, FLAGS.annotations_dir) for idx, image_file in enumerate(image_files): print(idx, image_file) annotation_path = os.path.join( annotations_dir, os.path.splitext(image_file)[0] + '.xml') if not os.path.exists(annotation_path): print('Could not find %s, ignoring example.', annotation_path) continue with tf.gfile.GFile(annotation_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, annotations_dir, FLAGS.images_dir, label_map_dict, FLAGS.include_masks, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def fun(path, imgpath): img = cv.imread(imgpath) line = open(path).read() xml = etree.fromstring(line) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] image_list = [] box_list = [] for obj in data['object']: box_list.append([ int(obj['bndbox']['ymin']), int(obj['bndbox']['ymax']), int(obj['bndbox']['xmin']), int(obj['bndbox']['xmax']) ]) mark = False for i, box in enumerate(boxe43): _path = os.path.basename(imgpath).split( '.jpg')[0] + "%02d" % i + '.jpg' if (listIou(box_list, box)): continue _path = os.path.join(DIR_yc, _path) cv.imwrite(_path, img[box[0]:box[1], box[2]:box[3]]) else: if random.randint(0, 100) < 60: continue _path = os.path.join(DIR_zc, _path) cv.imwrite(_path, img[box[0]:box[1], box[2]:box[3]])
def create_train_records_from_xml_files(label_dirs, xml_filenames_list, output_dir, shard_num=4): etrees = [ read_etree_from_path(os.path.join(label_dir, filename)) for label_dir, filenames in zip(label_dirs, xml_filenames_list) for filename in filenames ] datas = [ dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] for xml in etrees ] datas = [data for data in datas if data.get('object', None)] trainset_dir = output_dir evalset_dir = _join_path(output_dir, 'eval') print("Detected {} valid xml files".format(len(datas))) shard_size = len(datas) // shard_num processes = [] for shard_index in range(shard_num): start_i = shard_index * shard_size end_i = (shard_index + 1) * shard_size if shard_index < shard_num - 1 else len(datas) process = PipelineProcess(datas[start_i:end_i], trainset_dir=trainset_dir, evalset_dir=evalset_dir) processes.append(process) process.start() for process in processes: process.join() print('Pipeline #{} joined.'.format(process.pid)) print("done.")
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) print(FLAGS.label_map_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) #for year in years: logging.info('Reading from QUIZ dataset.') examples_path = os.path.join(data_dir, 'split', 'computer_' + FLAGS.set + '.txt') print(examples_path) annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): # if FLAGS.set not in SETS: # raise ValueError('set must be in : {}'.format(SETS)) # if FLAGS.year not in YEARS: # raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir # years = ['VOC2007', 'VOC2012'] # if FLAGS.year != 'merged': # years = [FLAGS.year] label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) # for year in years: # logging.info('Reading from PASCAL %s dataset.', year) # examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', # 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) # examples_list = dataset_util.read_examples_list(examples_path) # Dataset filenames, and shuffling. path = os.path.join(annotations_dir) examples_list = sorted(os.listdir(path)) if (True): random.seed(42) random.shuffle(examples_list) dataset_len = len(examples_list) idx = 0 # for idx, example in enumerate(examples_list): type = 'test' tf_filename = _get_output_filename(FLAGS.output_path, name='', idx=0, type=type) writer = tf.python_io.TFRecordWriter(tf_filename) write_train_flag = False while idx < dataset_len: if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) if((idx / dataset_len)*100 > FLAGS.test_percentage and not (write_train_flag)): type = 'train' tf_filename = _get_output_filename(FLAGS.output_path, name='', idx=0, type=type) writer = tf.python_io.TFRecordWriter(tf_filename) write_train_flag = True example = examples_list[idx] path = os.path.join(annotations_dir, example) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) idx += 1 writer.close()
def create_tf_record(output_filename, label_map_dict, annotations_dir, image_dir, examples): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. """ writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) path = os.path.join(annotations_dir, 'xmls', example + '.xml') if not os.path.exists(path): logging.warning('Could not find %s, ignoring example.', path) continue with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, label_map_dict, image_dir) writer.write(tf_example.SerializeToString()) writer.close()
def create_tf_record(output_filename, label_map_dict, annotation_dir, image_dir, examples, mask_type='png'): writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = annotation_dir + 'xmls/' + example + '.xml' mask_dir = annotation_dir + 'masks/' if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] try: tf_example = dict_to_tf_example(data, mask_dir, label_map_dict, image_dir, mask_type='png') writer.write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path) writer.close()
def create_tf_record( output_filename, #num_shards, label_map_dict, annotations_dir, image_dir, examples, #faces_only=True, #mask_type='png' ): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. num_shards: Number of shards for output file. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. """ with contextlib2.ExitStack() as tf_record_close_stack: writer = tf.python_io.TFRecordWriter(output_filename) #output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( # tf_record_close_stack, output_filename) #num_shards) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') #mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png') if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] try: tf_example = dict_to_tf_example( data, #mask_path, label_map_dict, image_dir, #faces_only=faces_only, #mask_type=mask_type ) writer.write(tf_example.SerializeToString()) #if tf_example: #shard_idx = idx % num_shards # output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def create_tf_record(output_filename, label_map_dict, annotations_dir, image_dir, examples): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). """ writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] try: tf_example = dict_to_tf_example(data, label_map_dict, image_dir) writer.write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) writer = tf_io.TFRecordWriter(FLAGS.output_path) logging.info('Reading from all_drones %s dataset!') files_list_file = os.path.join(FLAGS.data_dir, FLAGS.set + '.txt') examples_list = dataset_util.read_examples_list(files_list_file) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) logging.info('Save the tfrecord file to %s!' % FLAGS.output_path) path = os.path.join(FLAGS.annotations_dir, example + '.xml') with tf_gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.raw_data_dir, FLAGS.label_map_path, FLAGS.ignore_difficult_instances,\ is_debug=FLAGS.is_debug) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from CVHCI dataset.') # examples_path = os.path.join(data_dir, 'ImageSets', 'aeroplane_' + FLAGS.set + '.txt') <-- koennte wichtig sein!!! examples_path = os.path.join(data_dir, 'ImageSets', FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.image_dir, path) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) writer = tf.python_io.TFRecordWriter(FLAGS.output_path) data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict( '/home/zju/models/research/object_detection/Det_datasets/pascal_label_map.pbtxt' ) txt_path = os.path.join('Det_datasets', 'train_list' + '.txt') examples_list = read_examples_list(txt_path) for idx, example in enumerate(examples_list): path = os.path.join('Det_datasets', example) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] print(data) tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close() print('************************') print('FINISHED') print('************************')
def main(_): data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from UMDfaces dataset.') examples_path = os.path.join(data_dir, 'ImageSets', FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) if idx % 1000 == 0: print("Working on #{}/{} image".format(idx, len(examples_list))) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from {} dataset at {}...'.format( FLAGS.set, data_dir)) annotations_dir = os.path.join(data_dir, FLAGS.set, 'annotations') idx = 0 # for idx, example in enumerate(examples_list): for xml_file in os.listdir(annotations_dir): if idx % 10 == 0: logging.info('On image %d', idx) xml_path = os.path.join(annotations_dir, xml_file) with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) idx += 1 writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = os.listdir(annotations_dir) for el in examples_list: if el[-3:] !='xml': del examples_list[examples_list.index(el)] for el in examples_list: examples_list[examples_list.index(el)] = el[0:-4] for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): # data directory data_dir = FLAGS.data_dir # for output writer = tf.python_io.TFRecordWriter(FLAGS.output_path) # label map dictionary label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) # path to text file containing with list of jpeg file names (excluding .jpeg) examples_path = os.path.join(data_dir, 'filename_list.txt') examples_list = dataset_util.read_examples_list(examples_path) # path to folder containing annotations annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) for example in examples_list: # path to xml file path = os.path.join(annotations_dir, example + '.xml') # read xml file with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] # to tf.Example format and write to output file tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from dataset.') images_dir = os.path.join(data_dir, FLAGS.images_dir) images_path = os.listdir(images_dir) annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) print(annotations_dir) examples_list = [os.path.splitext(x)[0] for x in images_path] for idx, example in enumerate(examples_list): if idx % 10 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example.split('.')[0] + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, FLAGS.images_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir train_writer = tf.python_io.TFRecordWriter(FLAGS.output_path) validation_writer = tf.python_io.TFRecordWriter(FLAGS.output_path + ".validation") label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from dataset.') annotations_dir = data_dir files = glob.glob(os.path.join(data_dir, "*.xml")) random.shuffle(files) train_files = files[:int(len(files) * 0.8)] validation_files = files[int(len(files) * 0.8):] for writer, files in [(train_writer, train_files), (validation_writer, validation_files)]: for idx, xml_file in enumerate(files): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(files)) path = xml_file with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, path, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset', year) examples_path = FLAGS.set annotations_dir = FLAGS.annotations_dir print(examples_path) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): args = process_command_line() if args.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) output = os.path.join(args.data_dir, args.output_path) # touch the file if it doesn't already exist if not os.path.exists(output): with open(output, 'a'): os.utime(output) writer = tf.python_io.TFRecordWriter(output) label_map_dict = label_map_util.get_label_map_dict( os.path.join(args.data_dir, args.label_map_path)) label_count = {} for item, key in label_map_dict.items(): label_count[item] = 0 for c in args.collection: print('Reading from {0} dataset.'.format(c)) examples_path = os.path.join(args.data_dir, c, args.set + '.txt') png_dir = '{0}/{1}/PNGImages'.format(args.data_dir, c) annotations_dir = '{0}/{1}/Annotations'.format(args.data_dir, c) with open(examples_path) as fid: lines = fid.readlines() examples_list = [line.strip() for line in lines] for idx, example in enumerate(examples_list): if idx % 50 == 0: print('Processing image {0} of {1}'.format( idx, len(examples_list))) file = os.path.join(annotations_dir, example) with open(file, 'r') as fid: xml_str = fid.read() try: xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict( xml)['annotation'] tf_example, count = dict_to_tf_example(data, args.data_dir, label_map_dict, args.labels, png_dir) if tf_example: for key, item in count.items(): label_count[key] += item writer.write(tf_example.SerializeToString()) else: print('No objects found in {0}'.format(example)) except Exception as ex: print(ex) continue writer.close() ttl_objs = 0 for key, item in label_count.items(): print('{0} {1}'.format(key, item)) ttl_objs += item print('Done. Found {0} examples in {1} set'.format(ttl_objs, args.set))
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir output_file = os.path.join(FLAGS.output_dir, 'helmet_' + FLAGS.set + '.record') writer = tf.python_io.TFRecordWriter(output_file) label_map_file = os.path.join(data_dir, FLAGS.label_map_file) label_map_dict = label_map_util.get_label_map_dict(label_map_file) examples_path = os.path.join(data_dir, 'ImageSets', 'Main', 'helmet_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 5 == 0: print('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def create_tf_record(output_filename, num_shards, label_map_dict, annotations_dir, image_dir, examples, faces_only=True, mask_type='png'): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. num_shards: Number of shards for output file. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. """ with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png') if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] try: tf_example = dict_to_tf_example( data, mask_path, label_map_dict, image_dir, faces_only=faces_only, mask_type=mask_type) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path)
def create_tf_record(output_filename, label_map_dict, annotations_dir, image_dir, examples): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. """ writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] try: tf_example = dict_to_tf_example( data, label_map_dict, image_dir) writer.write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path) writer.close()