def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') examples_path = os.path.join(annotations_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') if FLAGS.faces_only: train_output_path = os.path.join(FLAGS.output_dir, 'pet_train_with_masks.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val_with_masks.record') create_tf_record(train_output_path, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only) create_tf_record(val_output_path, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from chengdu dataset.') # image_dir = os.path.join(data_dir, 'images') # annotations_dir = os.path.join(data_dir, 'annotations') # json examples_path = os.path.join(data_dir, 'masks.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'mask_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'mask_pet_val.record') create_tf_record( train_output_path, # output tfrecord label_map_dict, # label train_examples, data_dir) create_tf_record( val_output_path, label_map_dict, val_examples, data_dir)
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def test_read_examples_list(self): example_list_data = """example1 1\nexample2 2""" example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt') with tf.gfile.Open(example_list_path, 'wb') as f: f.write(example_list_data) examples = dataset_util.read_examples_list(example_list_path) self.assertListEqual(['example1', 'example2'], examples)
def extract_examples_list(dataset, categories, data_dir): examples_list = [] for category in categories: examples_path = os.path.join(data_dir, 'ImageSets', 'Main', str(category) + '_' + FLAGS.set + '.txt') examples_list += dataset_util.read_examples_list(examples_path) return list(set([x for x in examples_list if x]))
def test_read_examples_list(self): example_list_data = """example1 1\nexample2 2""" example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt') with tf.gfile.Open(example_list_path, 'wb') as f: f.write(example_list_data) examples = dataset_util.read_examples_list(example_list_path) self.assertListEqual(['example1', 'example2'], examples)
def main(_): print('FLAGS.custom_label_map =', FLAGS.custom_label_map) print('FLAGS.skip_category =', FLAGS.skip_category) print('FLAGS.custom_label_map =', custom_label_map) if FLAGS.custom_label_map: custom_label_map.update(dict(label_map.split(':') for label_map in FLAGS.custom_label_map.split(','))) # if FLAGS.set not in SETS: # raise ValueError('set must be in : {}'.format(SETS)) # if FLAGS.year not in YEARS: # raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) anno_image_dir_pairs = [] if FLAGS.annotation_image_dirs_file: with open(FLAGS.annotation_image_dirs_file) as f: for line in f: anno_image_dir_pairs.append(line.split() + ['year']) else: for year in years: annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) images_dir = os.path.join(data_dir, year, FLAGS.images_dir) anno_image_dir_pairs.append((annotations_dir, images_dir, year)) for annotations_dir, images_dir, year in anno_image_dir_pairs: if FLAGS.set == 'all': annotation_set = [f[:-4] for f in os.listdir(annotations_dir) if f.endswith('.xml')] image_set = [f[:-4] for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.png')] miss_images = set(annotation_set) - set(image_set) print('miss images:', len(miss_images), miss_images) examples_list = sorted(list(set(annotation_set) & set(image_set))) else: examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', FLAGS.set + '.txt') examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, images_dir, label_map_dict, FLAGS.ignore_difficult_instances, FLAGS.keep_empty_image) if tf_example is not None: writer.write(tf_example.SerializeToString()) writer.close() print('\t'.join(sorted(label_count.keys()))) print('\t'.join(map(str, (label_count[k] for k in sorted(label_count.keys())))))
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Mappy Annotation dataset.') image_dir = os.path.join(data_dir, 'Images') annotations_dir = os.path.join(data_dir, 'Annotations') imageSets_dir = os.path.join(data_dir, 'ImageSets') train_examples_path = os.path.join(imageSets_dir, 'train.txt') train_examples = dataset_util.read_examples_list(train_examples_path) val_examples_path = os.path.join(imageSets_dir, 'test.txt') val_examples = dataset_util.read_examples_list(val_examples_path) # our own split. """ random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] """ logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'mappy_blur_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'mappy_blur_val.record') create_tf_record(train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type) create_tf_record(val_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from dataset.') train_path = os.path.join(data_dir, 'train.txt') train_examples = dataset_util.read_examples_list(train_path) val_path = os.path.join(data_dir, 'eval.txt') val_examples = dataset_util.read_examples_list(val_path) logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'tf_train_all.record') val_output_path = os.path.join(FLAGS.output_dir, 'tf_val_all.record') create_tf_record(train_output_path, label_map_dict, train_examples) create_tf_record(val_output_path, label_map_dict, val_examples)
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) """ if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) """ data_dir = FLAGS.data_dir """ years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] """ writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) """ for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) """ examples_path = os.path.join(data_dir, 'ImageSets', 'Main', FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): label_map_dict = label_map_util.get_label_map_dict('/home/nitin/Anand/My_codes/Faster-RCNN tf/pascal_label_map.pbtxt') logging.info('Reading from pascal_voc dataset.') image_dir = '/home/nitin/Anand/Datasets/pascal_voc/VOCdevkit/VOC2012/JPEGImages' annotations_dir = '/home/nitin/Anand/Datasets/pascal_voc/VOCdevkit/VOC2012/Annotations' train_examples =os.path.join(annotations_dir, '/home/nitin/Anand/Datasets/pascal_voc/VOCdevkit/VOC2012/ImageSets/Main/train.txt') valid_examples = os.path.join(annotations_dir, '/home/nitin/Anand/Datasets/pascal_voc/VOCdevkit/VOC2012/ImageSets/Main/val.txt') train_list = dataset_util.read_examples_list(train_examples) valid_list = dataset_util.read_examples_list(valid_examples) logging.info('%d training and %d validation examples.', len(train_list), len(valid_list)) train_output_path = 'train.record' val_output_path = 'val.record' create_tf_record(train_output_path, label_map_dict, annotations_dir,image_dir, train_list) create_tf_record(val_output_path, label_map_dict, annotations_dir,image_dir,valid_list)
def main(_): if not tf.gfile.IsDirectory(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) train_images = dataset_util.read_examples_list(FLAGS.train_txt_path) val_images = dataset_util.read_examples_list(FLAGS.val_txt_path) annotations_file = FLAGS.annotations_file train_output_path = os.path.join(FLAGS.output_dir, 'train.record') val_output_path = os.path.join(FLAGS.output_dir, 'val.record') with contextlib2.ExitStack() as tf_record_close_stack, tf.gfile.GFile( annotations_file, 'r') as fid: groundtruth_data = json.load(fid) category_index = label_map_util.create_category_index( groundtruth_data['categories']) images = groundtruth_data['images'] image_filename_list = get_image_filename_list(images) train_images_id_list = get_images_id_list(image_filename_list, train_images) val_images_id_list = get_images_id_list(image_filename_list, val_images) train_annotations_index = get_annotations_indx(groundtruth_data, train_images_id_list) val_annotations_index = get_annotations_indx(groundtruth_data, val_images_id_list) _create_tf_record_from_coco_annotations(tf_record_close_stack, category_index, images, train_images_id_list, train_images, train_annotations_index, train_output_path, FLAGS.include_masks, num_shards=1) _create_tf_record_from_coco_annotations(tf_record_close_stack, category_index, images, val_images_id_list, val_images, val_annotations_index, val_output_path, FLAGS.include_masks, num_shards=1) print('# Finished.')
def main(_): data_dir = FLAGS.data_dir # data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') #/home/lucky/下载/w8-data annotations_dir = os.path.join(data_dir, 'annotations') #/home/lucky/下载/w8-data #trainval是缺失文件,自己组织。文件内容是所有图片名称但没有jpg #generate trainval.txt # path = image_dir list_trainval = os.listdir(image_dir) file_name = [] for item in list_trainval: # print(item) temp = item.strip().split('.') # print(temp[0]) file_name.append(temp[0]) temp_dir = os.path.abspath(os.path.join(annotations_dir, 'trainval.txt')) with open(temp_dir, 'w') as f: for item in file_name: f.write(item + '\n') ############################################################## examples_path = os.path.join(annotations_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) #设定随机种子 #将列表随机打乱顺序 random.shuffle(examples_list) num_examples = len(examples_list) #所有图片名称列表 num_train = int(0.7 * num_examples) #train val 7:3比例分开 train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) #输出tf.record文件改名称 train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') # if FLAGS.faces_only: # train_output_path = os.path.join(FLAGS.output_dir, # 'pet_train_with_masks.record') # val_output_path = os.path.join(FLAGS.output_dir, # 'pet_val_with_masks.record') create_tf_record(train_output_path, label_map_dict, annotations_dir, image_dir, train_examples) # faces_only=FLAGS.faces_only, # mask_type=FLAGS.mask_type) create_tf_record(val_output_path, label_map_dict, annotations_dir, image_dir, val_examples)
def main(_): label_map_dict = label_map_util.get_label_map_dict('annotations/label_map.pbtxt') logging.info('Reading from Pet dataset.') image_dir = 'images' annotations_dir = 'annotations' train_path = os.path.join(annotations_dir, 'train.txt') test_path = os.path.join(annotations_dir, 'test.txt') train_examples = dataset_util.read_examples_list(train_path) val_examples = dataset_util.read_examples_list(test_path) logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = 'train.record' val_output_path = 'val.record' create_tf_record(train_output_path, label_map_dict, annotations_dir, image_dir, train_examples) create_tf_record(val_output_path, label_map_dict, annotations_dir, image_dir, val_examples)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') examples_path = os.path.join(annotations_dir, 'trainval.txt') xmls_path = os.path.join(annotations_dir, 'xmls') # auto generate trainval.txt if os.path.exists(examples_path): examples_list = dataset_util.read_examples_list(examples_path) else: examples_list = [ os.path.splitext(x)[0] for x in os.listdir(xmls_path) if os.path.splitext(x)[1] == '.xml' ] with open(examples_path, 'w') as f: f.write('\n'.join(examples_list)) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') # if FLAGS.faces_only: # train_output_path = os.path.join(FLAGS.output_dir, # 'quiz_train_with_masks.record') # val_output_path = os.path.join(FLAGS.output_dir, # 'quiz_val_with_masks.record') create_tf_record(train_output_path, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type) create_tf_record(val_output_path, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) """ if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) """ data_dir = FLAGS.data_dir """ years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] """ writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) #for year in years: logging.info('Reading from dataset.') examples_path = os.path.join(data_dir, 'ImageSets', 'Main', 'imageset_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] print(data) data['folder'] = '' data['filename'] = example + '.png' for obj in data['object']: obj['pose'] = 'front' obj['difficult'] = 1 obj['truncated'] = 1 bndbox = obj['bndbox'] if int(bndbox['xmin']) > int(bndbox['xmax']): x = bndbox['xmin'] bndbox['xmin'] = bndbox['xmax'] bndbox['xmax'] = x if int(bndbox['ymin']) > int(bndbox['ymax']): y = bndbox['ymin'] bndbox['ymin'] = bndbox['ymax'] bndbox['ymax'] = y tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') ######## Modify ############## examples_path = os.path.join(annotations_dir, 'trainval.txt') #examples_path = os.path.join(annotations_dir, 'val.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record') if not FLAGS.faces_only: train_output_path = os.path.join( FLAGS.output_dir, 'pets_fullbody_with_masks_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pets_fullbody_with_masks_val.record') create_tf_record( train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only, # mask_type=FLAGS.mask_type ) create_tf_record( val_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only, # mask_type=FLAGS.mask_type )
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir output_filenames = [] for shard_id in range(FLAGS.NUM_SHARDS): output_filenames.append( _get_dataset_filename(FLAGS.output_path, FLAGS.set, shard_id, FLAGS.NUM_SHARDS)) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from PASCAL %s dataset.', FLAGS.year) examples_path = os.path.join(data_dir, FLAGS.year, 'ImageSets', 'Main', FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, FLAGS.year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) #pdb.set_trace() num_per_shard = int(math.ceil( len(examples_list) / float(FLAGS.NUM_SHARDS))) shard_id = 0 start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(examples_list)) writer = tf.python_io.TFRecordWriter(output_filenames[shard_id]) for idx, example in enumerate(examples_list): #if idx % 100 == 0: # logging.info('On image %d of %d', idx, len(examples_list)) if (idx > end_ndx) and (shard_id < FLAGS.NUM_SHARDS): writer.close() shard_id = shard_id + 1 start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(examples_list)) writer = tf.python_io.TFRecordWriter(output_filenames[shard_id]) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] label_map_dict = label_map_util.get_label_map_dict( FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) num_per_shard = int(FLAGS.images_per_share) num_shards = int(math.ceil(len(examples_list) / num_per_shard)) for shard_id in range(num_shards): output_filename = _get_dataset_filename( FLAGS.output_path, FLAGS.set, year, shard_id, num_shards) with tf.python_io.TFRecordWriter(output_filename) as writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(examples_list)) for i in range(start_ndx, end_ndx): sys.stdout.write( '\r>> Converting image %d/%d shard %d' % (i + 1, len(examples_list), shard_id)) sys.stdout.flush() example = examples_list[i] path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict( xml)['annotation'] tf_example = dict_to_tf_example( data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString())
def main(_): image_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.basicConfig(filename='mengniu.log', level=logging.WARNING) logging.info('Reading from mengniu dataset.') annotations_dir = FLAGS.anno_dir # annotations_dir = os.path.join(data_dir, 'mengniu-20181115-pur') examples_path = os.path.join(annotations_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.9 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'mengniu_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'mengniu_val.record') if not FLAGS.faces_only: train_output_path = os.path.join(FLAGS.output_dir, 'mengniu_masks_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'mengniu_masks_val.record') create_tf_record(train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type, use_multiprocess=FLAGS.multi) create_tf_record(val_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type, use_multiprocess=FLAGS.multi)
def main(_): # check the dataset is supported # if FLAGS.set not in SETS: # raise ValueError('set must be in : {}'.format(SETS)) data_names = FLAGS.data_name.split(';') if len(data_names) == 0: raise ValueError('.data name must be setted') print('.data names: ' + str(data_names)) data_set = FLAGS.set if FLAGS.set == '': raise ValueError('set must be setted') data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for data_name in data_names: logging.info('Reading from PASCAL %s dataset.', data_name) examples_path = os.path.join(data_dir, data_name, 'ImageSets', 'Main', data_set + '.txt') print('Examples path: ', examples_path) annotations_dir = os.path.join(data_dir, data_name, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) print(examples_list) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') print(path) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): images_dir = FLAGS.images_dir image_files = dataset_util.read_examples_list(FLAGS.image_list_path) annotations_dir = os.path.join(images_dir, FLAGS.annotations_dir) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) writer = tf.python_io.TFRecordWriter(FLAGS.output_path) for idx, image_file in enumerate(image_files): print(idx, image_file) image_file_split = image_file.split('/') annotation_path = os.path.join(annotations_dir, os.path.splitext(image_file_split[-1])[0] + '.xml') with tf.gfile.GFile(annotation_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, image_file, annotations_dir, label_map_dict, FLAGS.include_masks, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from crobot dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') examples_path = os.path.join(data_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'crobot_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'crobot_val.record') if not FLAGS.bbox_only: train_output_path = os.path.join(FLAGS.output_dir, 'crobot_train_with_masks.record') val_output_path = os.path.join(FLAGS.output_dir, 'crobot_val_with_masks.record') print('Create train set TFRecord') create_tf_record(train_output_path, label_map_dict, annotations_dir, image_dir, train_examples, bbox_only=FLAGS.bbox_only, mask_type=FLAGS.mask_type) print('Create val set TFRecord') create_tf_record(val_output_path, label_map_dict, annotations_dir, image_dir, val_examples, bbox_only=FLAGS.bbox_only, mask_type=FLAGS.mask_type)
def create_records(data_dir, to_path='data/train.tfrecord'): annotations_dir, examples_path = get_fun_paths(data_dir) writer = tf.python_io.TFRecordWriter(to_path) labels = {} examples_list = dataset_util.read_examples_list(examples_path) assert len(examples_list) > 0, examples_path for i, example in enumerate(examples_list): path = os.path.join(annotations_dir, example + '.xml') data = xml_to_dict(path) assert 'object' in data, data['filename'] labels[i] = [k['name'] for k in data['object']] try: tf_example = dict_to_tf_example(data, data_dir, label_map_dict) except Exception as e: #TODO(SS): remove me print(e) import pdb; pdb.set_trace() writer.write(tf_example.SerializeToString()) writer.close() return labels # to inspect a bit
def main(_): images_dir = FLAGS.images_dir annotations_dir = os.path.join(FLAGS.annotations_dir) print("include masks", FLAGS.include_masks) if FLAGS.include_masks: train_output_path = os.path.join(FLAGS.output_path, 'train_with_masks.record') val_output_path = os.path.join(FLAGS.output_path, 'val_with_masks.record') else: train_output_path = os.path.join(FLAGS.output_path, 'train.record') val_output_path = os.path.join(FLAGS.output_path, 'val.record') writer = tf.python_io.TFRecordWriter(train_output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) # SBA examples_path = os.path.join("tmp/object_detection/annotations", 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) ## END #os.chdir(images_dir) #file_types = ('*.jpg', '*.jpeg') #image_files = [] #for file_type in file_types: #image_files.extend(glob.glob(file_type)) #print("image_file: %s", image_files) create_tf_record(train_output_path, label_map_dict, annotations_dir, FLAGS.images_dir, train_examples, FLAGS.include_masks) create_tf_record(val_output_path, label_map_dict, annotations_dir, FLAGS.images_dir, val_examples, FLAGS.include_masks)
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2018'] #our own datase delete 'VOC2007', 'VOC2012' if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', FLAGS.set + '.txt') #delete 'aeroplace_' annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() #print(xml_str[39:]) xml = etree.fromstring(xml_str) # change this code print('*'*20) print(etree.tostring(xml)) #utf8_parser = etree.XMLParser(encoding='utf-8') #xml = etree.fromstring(xml_str.encode('utf-8'),parser=utf8_parser) #print('xml',xml) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] #print('data is ',data) tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) #print('example',tf_example) writer.write(tf_example.SerializeToString()) print(len(examples_list)) #check the length of validation samples writer.close()
def main(_): data_dir = FLAGS.data_dir create_trainval_list(data_dir) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) examples_path = os.path.join(data_dir, 'trainval.txt') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] create_tf(train_examples, annotations_dir, label_map_dict, "train") create_tf(val_examples, annotations_dir, label_map_dict, "val")
def save_tf_record(data_info,indices): #if FLAGS.set not in SETS: #raise ValueError('set must be in : {}'.format(SETS)) data_dir = data_info['data_dir'] output_path = data_info['output_path'] writer = tf.python_io.TFRecordWriter(output_path) label_map_dict = label_map_util.get_label_map_dict(data_info['label_map_path']) logging.info('Reading from ImageNet-VID dataset.') examples_path = os.path.join(data_dir,'AL', data_info['set'] + '.txt') # Annotations always come from train set now (revisit if we include val) annotations_dir = os.path.join(data_dir, data_info['annotations_dir'],'VID','train') examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) if idx in indices: example_xml = example[-63:-5]+'.xml' path = os.path.join(annotations_dir,example_xml) # indexing of example to remove .JPEG from the end of file name #path = os.path.join(annotations_dir, example[-63:-5] + '.xml') # indexing of example to remove .JPEG from the end of file name #print ('Annotation path= ',path) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, data_info['data_dir'], label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) #pdb.set_trace() writer.close()
def main(_): dataset_dir = FLAGS.dataset_dir annotations_dir = Path(dataset_dir, ANNOTATION_DIR) images_dir = Path(dataset_dir, IMAGE_DIR) output_dir = Path(FLAGS.output_dir) if not output_dir.exists(): output_dir.mkdir(parents=True) train_output_path = Path(output_dir, f'{FLAGS.output_name}_train.record') val_output_path = Path(output_dir, f'{FLAGS.output_name}_val.record') label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) examples_path = Path(FLAGS.examples_path) examples = dataset_util.read_examples_list(str(examples_path)) train_examples, val_examples = train_test_split(examples, test_ratio=FLAGS.val_ratio) create_tf_record(train_output_path, annotations_dir, images_dir, label_map_dict, train_examples) create_tf_record(val_output_path, annotations_dir, images_dir, label_map_dict, val_examples)
def main(_): annotations_dir = FLAGS.annotations_dir images_dir = FLAGS.images_dir train_output_path = os.path.join(FLAGS.output_dir, 'sushi_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'sushi_val.record') label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) examples_path = os.path.join(FLAGS.annotations_dir, 'trainval.txt') examples = dataset_util.read_examples_list(examples_path) random.seed(42) random.shuffle(examples) num_examples = len(examples) num_train = int(0.7 * num_examples) train_examples = examples[:num_train] val_examples = examples[num_train:] create_tf_record(train_output_path, annotations_dir, images_dir, label_map_dict, train_examples) create_tf_record(val_output_path, annotations_dir, images_dir, label_map_dict, val_examples)
def main(_): data_dir = '/Users/sibylhe/Documents/DR/image_extraction/image/maskrcnn181105/' image_dir = data_dir + 'images/' annotation_dir = data_dir + 'annotations/' label_map_dict = label_map_util.get_label_map_dict( annotation_dir + 'minion_label_map.pbtxt') examples_path = annotation_dir + 'trainval.txt' examples_list = dataset_util.read_examples_list(examples_path) logging.info('Reading from Pet dataset.') random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.75 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) logging.info('Train examples: %s', str(train_examples)) logging.info('Validation examples: %s', str(val_examples)) if not os.path.exists(data_dir + 'records'): os.makedirs(data_dir + 'records') train_output_path = data_dir + 'records/minion_train.record' val_output_path = data_dir + 'records/minion_val.record' create_tf_record(train_output_path, label_map_dict, annotation_dir, image_dir, train_examples, mask_type='png') create_tf_record(val_output_path, label_map_dict, annotation_dir, image_dir, val_examples, mask_type='png')
def generate_cropped_images(): datasets = ['rothschild', 'google', 'noor'] categories = [ 11, 12, 13, 14, 15, 16, 17, 18, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 47, 48 ] output_directory = FLAGS.set for category in categories: create_directory_if_not_exists(output_directory) create_directory_if_not_exists( os.path.join(output_directory, str(category))) for dataset in datasets: data_dir = os.path.join('data_indices', dataset) examples_path = os.path.join( data_dir, 'ImageSets', 'Main', str(categories[0]) + '_' + FLAGS.set + '.txt') label_map_dict = label_map_util.get_label_map_dict( os.path.join(data_dir, 'pascal_label_map.pbtxt')) print('label_map_dict', label_map_dict) annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) examples_list = [x for x in examples_list if x] print(examples_list) for idx, example in enumerate(examples_list): path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] if 'object' not in data.keys(): print('No label, ignoring ', path) continue save_cropped_images(data, data_dir, label_map_dict, categories=categories, dataset_name=dataset)
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) global nuts_count, bolts_count, washer_count, lockwasher_count data_dir = FLAGS.data_dir examples_path = data_dir annotations_dir = data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) examples_list = dataset_util.read_examples_list(examples_path) file_count = 0 for file in os.listdir(annotations_dir): if file.endswith(".xml"): path = os.path.join(annotations_dir, file) with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) if tf_example: writer.write(tf_example.SerializeToString()) file_count += 1 writer.close() print( 'processed {} files with {} bolts {} nuts {} washers {} lock washers'. format(file_count, bolts_count, nuts_count, washer_count, lockwasher_count))
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) # List of example images for the set (train/val) examples_path = os.path.join(data_dir, EXAMPLES_PATHS[FLAGS.set], FLAGS.set + '.txt') examples_list = dataset_util.read_examples_list(examples_path) print(len(examples_list)) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) split_data_dir = os.path.join(data_dir, EXAMPLES_PATHS[FLAGS.set]) tf_example = dict_to_tf_example( split_data_dir, name=example, ignore_difficult_instances=FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) examples_list = dataset_util.read_examples_list(FLAGS.examples_path) for idx, example in enumerate(examples_list): path = os.path.join(FLAGS.annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()