def load_pascal(data_dir, set, is_detect=True, is_seg=False): assert is_detect != is_seg annotations_dir = os.path.join(data_dir, "Annotations") image_dir = os.path.join(data_dir, "JPEGImages") pascal_ann_dict = dict() if is_detect: examples_path = os.path.join(data_dir, 'ImageSets', 'Main', set + '.txt') if is_seg: examples_path = os.path.join(data_dir, 'ImageSets', 'Segmentation', set + '.txt') examples_list = dataset_util.read_examples_list(examples_path) for example in examples_list: path = os.path.join(annotations_dir, example + '.xml') with open(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] data["img_path"] = os.path.join(image_dir, data['filename']) pascal_ann_dict[data['filename']] = data return pascal_ann_dict
def load_anno_sample(image_id): anno_file = os.path.join(annotations_dir, image_id + '.xml') with open(anno_file, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) anno_data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] return anno_data
def main(_): data_dir = FLAGS.data_dir annotations_dir = os.path.join(data_dir, 'labels') writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) img_path = os.path.join(data_dir, 'img') examples_list = glob.glob(img_path + '/*.jpg') # examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): example = example.split('/')[-1].split('.')[0] path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example( data=data, dataset_directory=os.path.join(data_dir), label_map_dict=label_map_dict, ignore_difficult_instances=FLAGS.ignore_difficult_instances, image_subdirectory='img') writer.write(tf_example.SerializeToString()) writer.close()
def main(_): # if FLAGS.set not in SETS: # raise ValueError('set must be in : {}'.format(SETS)) # if FLAGS.year not in YEARS: # raise ValueError('year must be in : {}'.format(YEARS)) data_dir = 'E:/computerscience/my projects/humanoid/VOCdevkit' years = ['VOC2012'] # if FLAGS.year != 'merged': # years = [FLAGS.year] writer = tf.python_io.TFRecordWriter('pascal_train.record') label_map_dict = label_map_util.get_label_map_dict( 'data/pascal_label_map.pbtxt') for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + 'train' + '.txt') annotations_dir = os.path.join(data_dir, year, 'Annotations') examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, data_dir, label_map_dict) print(tf_example) writer.write(tf_example.SerializeToString()) break writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) print ("output_path is :") print(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from VID dataset.') examples_path = os.path.join(data_dir,'ImageSets', 'VID','list' ,FLAGS.set + '_list.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir, 'VID', FLAGS.set) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.set) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def _create_tf_record_from_coco_annotations(fs, output_path, num_shards=100): with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_path, num_shards) label_map_dict = {"1": "1", "2": "2", "3": "others"} for idx, example in enumerate(fs): shard_idx = idx % num_shards image_path = example[0] annotation_file = example[1] if shard_idx == 0: logging.info('On image %d', idx) assert os.path.basename(image_path).replace( ".jpg", "") == os.path.basename(annotation_file).replace(".xml", "") with tf.gfile.GFile(annotation_file, 'r') as fid: xml_str = fid.read() try: xml = etree.fromstring(xml_str) except Exception as ex: print(ex) xml_str = "\n".join(xml_str.split("\n")[1:]) xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, image_path, label_map_dict, FLAGS.ignore_difficult_instances) output_tfrecords[shard_idx].write(tf_example.SerializeToString())
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading dataset.') examples_path = '/home/wangshiyao/Documents/data/imagenet/gen_list/combine_train_list.txt' annotations_dir = '/home/wangshiyao/Documents/data/imagenet/' examples_list = dataset_util.read_examples_list(examples_path) num_label = [0] * 31 for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) if int(idx) % 100 == 0: print(idx, num_label) path = os.path.join(annotations_dir, example) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, example, FLAGS.data_dir, label_map_dict, FLAGS.set, num_label) #writer.write(tf_example.SerializeToString()) writer.close()
def create_record_file(data_dir, output_file, year, split_name): years = ['VOC2007', 'VOC2012'] if year != 'merged': years = [year] # Create tf.Record writer writer = tf.python_io.TFRecordWriter(output_file) for year in years: print('Creating TFRecord file from PASCAL {} {} dataset'.format( year, split_name)) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', split_name + '.txt') annotations_dir = os.path.join(data_dir, year, 'Annotations') examples_list = dataset_util.read_examples_list(examples_path) for idx in trange(0, len(examples_list)): example = examples_list[idx] # Find and parse annotation xml file path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] # Create tf.Example and add to tf.Record tf_example = _dict_to_tf_example(data, data_dir) writer.write(tf_example.SerializeToString()) writer.close() print('Saved tf Record to {}\n'.format(output_file))
def load_pascal_single(anno_path, image_dir): with open(anno_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] data["img_path"] = os.path.join(image_dir, data['filename']) return data
def create_tf_record(output_filename, label_map_dict, annotations_dir, image_dir, examples): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. """ writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) path = os.path.join(annotations_dir, 'xmls', example + '.xml') if not os.path.exists(path): logging.warning('Could not find %s, ignoring example.', path) continue with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, label_map_dict, image_dir) writer.write(tf_example.SerializeToString()) writer.close()
def _load_anno_sample(anno_path): ''' 加载一个标注信息 :param anno_path: pascal voc 格式标注文件路径 :return: ''' with open(anno_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) anno_data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] return anno_data
def statistic(): annotations_dir = "/Users/hy/Documents/coco/Annotations/" all_xml_fs = [ os.path.join(annotations_dir, _) for _ in sorted(os.listdir(annotations_dir)) ] all_xml_fs = [_ for _ in all_xml_fs if _.endswith(".xml")] names = [] pose = [] truncated = [] difficult = [] width = [] height = [] depth = [] xmin = [] ymin = [] xmax = [] ymax = [] for annotation_file in all_xml_fs: with tf.gfile.GFile(annotation_file, 'r') as fid: xml_str = fid.read() try: xml = etree.fromstring(xml_str) except Exception as ex: print(ex) xml_str = "\n".join(xml_str.split("\n")[1:]) xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] names.extend([_["name"] for _ in data['object']]) pose.extend([_["pose"] for _ in data['object']]) truncated.extend([_["truncated"] for _ in data['object']]) difficult.extend([_["difficult"] for _ in data['object']]) width.append(data["size"]["width"]) height.append(data["size"]["height"]) depth.append(data["size"]["depth"]) xmin.append(min([float(_['bndbox']['xmin']) for _ in data['object']])) ymin.append(min([float(_['bndbox']['ymin']) for _ in data['object']])) xmax.append(max([float(_['bndbox']['xmax']) for _ in data['object']])) ymax.append(max([float(_['bndbox']['ymax']) for _ in data['object']])) print(set(names))
def create_tf_record(output_filename, label_map_dict, annotations_dir, image_dir, examples, faces_only=True, mask_type='png'): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. """ writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png') if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] try: tf_example = dict_to_tf_example( data, mask_path, label_map_dict, image_dir, faces_only=faces_only, mask_type=mask_type) writer.write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path) writer.close()
def _load_bboxes_names(anno_path): with open(anno_path, 'rb') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) anno_data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] bboxes = [] names = [] if "object" in anno_data: for obj in anno_data["object"]: xmin = int(obj["bndbox"]["xmin"]) ymin = int(obj["bndbox"]["ymin"]) xmax = int(obj["bndbox"]["xmax"]) ymax = int(obj["bndbox"]["ymax"]) bboxes.append([xmin, ymin, xmax, ymax]) names.append(obj["name"]) return bboxes, names
def main(_): print(FLAGS.data_dir) if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) #if FLAGS.year not in YEARS: # raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir #years = ['VOC2007', 'VOC2012'] #if FLAGS.year != 'merged': years = [FLAGS.year] ACTIONSET = ['tfrecord', 'imageset'] if FLAGS.action not in ACTIONSET: raise ValueError('action must be in : {}'.format(ACTIONSET)) if FLAGS.action == 'tfrecord': pass elif FLAGS.action == 'imageset': gen_image_set(FLAGS.data_dir, FLAGS.year, FLAGS.imageset) return writer = tf.io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', FLAGS.imageset + '_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.io.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str.encode('utf-8')) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): logging.info('Prepare process samples in {}'.format(FLAGS.data_dir)) data_dir = FLAGS.data_dir years = list(map(lambda x: x.strip(), str(FLAGS.year).split(','))) label_map_file = FLAGS.label_map_path if not os.path.exists(label_map_file): label_map_file = os.path.join(data_dir, 'label_map.pbtxt') if not os.path.exists(label_map_file): raise FileExistsError('label map file not exist.') label_map_dict = label_map_util.get_label_map_dict(label_map_file) output_path = FLAGS.output_path if not output_path: output_path = os.path.basename(os.path.dirname(data_dir + os.sep)) + '.tfrecord' logging.info('Prepare write samples to {}'.format(output_path)) writer = tf.io.TFRecordWriter(output_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = gen_image_set(FLAGS.data_dir, year) examples_list = dataset_util.read_examples_list(examples_path) annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.io.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str.encode('utf-8')) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, year, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def gen_shard(examples_list, annotations_dir, out_filename, root_dir, _set): writer = tf.python_io.TFRecordWriter(out_filename) for indx, example in enumerate(examples_list): ## sample frames xml_pattern = os.path.join(annotations_dir, example + '/*.xml') xml_files = sorted(glob.glob(xml_pattern)) samples = sample_frames(xml_files) for sample in samples: dicts = [] for xml_file in sample: ## process per single xml with tf.gfile.GFile(xml_file, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) dic = dataset_util.recursive_parse_xml_to_dict( xml)['annotation'] dicts.append(dic) tf_example = dicts_to_tf_example(dicts, root_dir, _set) writer.write(tf_example.SerializeToString()) writer.close() return
def create_tf_record(output_filename, label_map_dict, annotations_dir, image_dir, examples): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. """ writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) path = os.path.join(annotations_dir, 'xmls', example + '.xml') #setting the path to load the exact xml if not os.path.exists(path): logging.warning('Could not find %s, ignoring example.', path) continue with tf.gfile.GFile(path, 'r') as fid: #open the xml file as a gfile xml_str = fid.read() xml = etree.fromstring(xml_str) #parse the xml string data = dataset_util.recursive_parse_xml_to_dict(xml)[ 'annotation'] #annotation data as dict tf_example = dict_to_tf_example( data, label_map_dict, image_dir ) #This outputs the ecoded image pixels , heights width , and ground truth cordinates writer.write(tf_example.SerializeToString() ) #writing this to a tensorflow record mode writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) print('Reading from PASCAL dataset.') examples_path = os.path.join(data_dir, 'ImageSets', 'Main', FLAGS.set + '.txt') if FLAGS.include_segment_class or FLAGS.include_segment_object: examples_path = os.path.join(data_dir, 'ImageSets', 'Segmentation', FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') mask_filename = None if FLAGS.include_segment_class or FLAGS.include_segment_object: mask_filename = example + ".png" with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances, mask_filename=mask_filename, include_segment_class=FLAGS.include_segment_class, include_segment_object=FLAGS.include_segment_object) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir name = FLAGS.name data_folder = os.path.join(data_dir, name) annotations_dir = os.path.join(data_dir, name, FLAGS.annotations_dir) all_annotations = get_all_annotations(annotations_dir) train_set = os.path.join(data_folder, "ImageSets", "Main") + os.sep + 'train.txt' val_set = os.path.join(data_folder, "ImageSets", "Main") + os.sep + 'val.txt' if not os.path.exists(train_set): logging.info("Train set not fount, generate 80% from all data.") write_annotations(all_annotations[:int(len(all_annotations) * 0.8)], data_folder, 'train') if not os.path.exists(val_set): logging.info("Validate set not fount, generate 20% from all data.") write_annotations(all_annotations[int(len(all_annotations) * 0.8):], data_folder, 'val') label_map_path = FLAGS.label_map_path if not os.path.exists(label_map_path): logging.info("%s not fount, try to find at %s", label_map_path, data_folder) label_map_path = os.path.join(data_folder, FLAGS.label_map_path) if not os.path.exists(label_map_path): logging.info("%s not fount, failed!", label_map_path) return output_path = FLAGS.output_path if not output_path: out_name = os.path.basename(data_folder) if not out_name: out_name = os.path.basename(data_dir) output_path = os.path.basename(out_name) + '.tfrecord' logging.info("Using label map path: %s.", label_map_path) logging.info("Using annotations dir: %s.", annotations_dir) logging.info("Using output path: %s.", output_path) label_map_dict = label_map_util.get_label_map_dict(label_map_path) # print(FLAGS.data_dir) # if FLAGS.set not in SETS: # raise ValueError('set must be in : {}'.format(SETS)) # if FLAGS.year not in YEARS: # raise ValueError('year must be in : {}'.format(YEARS)) # years = ['VOC2007', 'VOC2012'] # if FLAGS.year != 'merged': # years = [FLAGS.year] # ACTIONSET = ['tfrecord', 'imageset'] # if FLAGS.action not in ACTIONSET: # raise ValueError('action must be in : {}'.format(ACTIONSET)) # if FLAGS.action == 'tfrecord': # pass # elif FLAGS.action == 'imageset': # gen_image_set(FLAGS.data_dir, FLAGS.year, FLAGS.imageset) # return for set_name, image_set_path in zip(('train', 'val'), (train_set, val_set)): logging.info("Generate data set %s in %s.", set_name, image_set_path) examples_list = dataset_util.read_examples_list(image_set_path) writer = tf.io.TFRecordWriter( os.path.splitext(output_path)[0] + '_' + set_name + os.path.splitext(output_path)[1]) step = max(len(examples_list) // 10 // 100 * 100, 10) for idx, example in enumerate(examples_list): if idx % step == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') if not Path(path).exists(): logging.error( 'Annotation xml %s not exist, press any key to continue..., q for quit.', path) key = input() if key == 'q': break else: continue with tf.io.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str.encode('utf-8')) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] # logging.info("Create tf example for %s.", path) tf_example = dict_to_tf_example(data, data_folder, label_map_dict, FLAGS.ignore_difficult_instances) if tf_example: writer.write(tf_example.SerializeToString()) writer.close()
def _load_anno_sample(anno_path): with open(anno_path, 'r') as fid: xml_str = fid.read() xml_str = etree.fromstring(xml_str) anno_data = dataset_util.recursive_parse_xml_to_dict(xml_str)['annotation'] return anno_data
def main(_): logging.info('Prepare process samples in {}'.format(FLAGS.data_dir)) data_dir = FLAGS.data_dir years = list(map(lambda x: x.strip(), str(FLAGS.year).split(','))) label_map_file = FLAGS.label_map_path if not os.path.exists(label_map_file): label_map_file = os.path.join(data_dir, 'label_map.pbtxt') if not os.path.exists(label_map_file): raise FileExistsError('label map file not exist.') label_map_dict = label_map_util.get_label_map_dict(label_map_file) # output path output_path = FLAGS.output_path if not output_path: output_path = '.' # os.path.basename(os.path.dirname(data_dir+os.sep)) + '.tfrecord' logging.info('Prepare write samples to {}'.format(output_path)) # 先默认比例 6:2:2 train valid test sample_name = os.path.basename(os.path.dirname(data_dir + os.sep)) output_train = output_path + os.sep + sample_name + '_train.tfrecord' output_valid = output_path + os.sep + sample_name + '_valid.tfrecord' output_test = output_path + os.sep + sample_name + '_test.tfrecord' writers = { output_train: tf.io.TFRecordWriter(output_train), output_valid: tf.io.TFRecordWriter(output_valid), output_test: tf.io.TFRecordWriter(output_test), } for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = gen_image_set(FLAGS.data_dir, year) examples_list = dataset_util.read_examples_list(examples_path) annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.io.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str.encode('utf-8')) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, year, label_map_dict, FLAGS.ignore_difficult_instances) random_val = random.randint(1, 100) writer = writers[output_train] if 60 < random_val <= 80: writer = writers[output_valid] elif 80 <= random_val: writer = writers[output_test] writer.write(tf_example.SerializeToString()) for writer in writers.values(): writer.close()
def xxx(fs): for idx, example in enumerate(fs): print("idx", idx) if idx < 10: continue image_path = example[0] annotation_file = example[1] print("image_path", image_path) print("annotation_file", annotation_file) assert os.path.basename(image_path).replace( ".jpg", "") == os.path.basename(annotation_file).replace(".xml", "") with tf.gfile.GFile(annotation_file, 'r') as fid: xml_str = fid.read() try: xml = etree.fromstring(xml_str) except Exception as ex: print(ex) xml_str = "\n".join(xml_str.split("\n")[1:]) xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] full_path = image_path with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width = int(data['size']['width']) height = int(data['size']['height']) print("width", width) print("height", height) xmin = [] ymin = [] xmax = [] ymax = [] if 'object' in data: for obj in data['object']: if int(obj['name']) != 2: continue obj_xmin = float(obj['bndbox']['xmin']) obj_ymin = float(obj['bndbox']['ymin']) obj_xmax = float(obj['bndbox']['xmax']) obj_ymax = float(obj['bndbox']['ymax']) # assert width > obj_xmin > 0 # assert height > obj_ymin > 0 # assert width > obj_xmax > 0 # assert height > obj_ymax > 0 # # assert obj_xmin < obj_xmax # assert obj_ymin < obj_ymax xmin.append(obj_xmin / width) ymin.append(obj_ymin / height) xmax.append(obj_xmax / width) ymax.append(obj_ymax / height) # xmin.append(obj_xmin) # ymin.append(obj_ymin) # xmax.append(obj_xmax) # ymax.append(obj_ymax) bboxes = np.array([ymin, xmin, ymax, xmax]).transpose([1, 0]) image = image.convert("RGB") draw_bounding_boxes_on_image(image, bboxes, color="red", thickness=4) PIL.Image.Image.show(image)