def __init__(self, tensor_key, label_map_proto_file, shape_keys=None, shape=None, default_value=''): """Initializes the LookupTensor handler. Simply calls a vocabulary (most often, a label mapping) lookup. Args: tensor_key: the name of the `TFExample` feature to read the tensor from. label_map_proto_file: File path to a text format LabelMapProto message mapping class text to id. shape_keys: Optional name or list of names of the TF-Example feature in which the tensor shape is stored. If a list, then each corresponds to one dimension of the shape. shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is reshaped accordingly. default_value: The value used when the `tensor_key` is not found in a particular `TFExample`. Raises: ValueError: if both `shape_keys` and `shape` are specified. """ name_to_id = label_map_util.get_label_map_dict( label_map_proto_file, use_display_name=False) # We use a default_value of -1, but we expect all labels to be contained # in the label map. name_to_id_table = tf.contrib.lookup.HashTable( initializer=tf.contrib.lookup.KeyValueTensorInitializer( keys=tf.constant(list(name_to_id.keys())), values=tf.constant(list(name_to_id.values()), dtype=tf.int64)), default_value=-1) display_name_to_id = label_map_util.get_label_map_dict( label_map_proto_file, use_display_name=True) # We use a default_value of -1, but we expect all labels to be contained # in the label map. display_name_to_id_table = tf.contrib.lookup.HashTable( initializer=tf.contrib.lookup.KeyValueTensorInitializer( keys=tf.constant(list(display_name_to_id.keys())), values=tf.constant( list(display_name_to_id.values()), dtype=tf.int64)), default_value=-1) self._name_to_id_table = name_to_id_table self._display_name_to_id_table = display_name_to_id_table super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape, default_value)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from chengdu dataset.') # image_dir = os.path.join(data_dir, 'images') # annotations_dir = os.path.join(data_dir, 'annotations') # json examples_path = os.path.join(data_dir, 'masks.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'mask_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'mask_pet_val.record') create_tf_record( train_output_path, # output tfrecord label_map_dict, # label train_examples, data_dir) create_tf_record( val_output_path, label_map_dict, val_examples, data_dir)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') examples_path = os.path.join(annotations_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') if FLAGS.faces_only: train_output_path = os.path.join(FLAGS.output_dir, 'pet_train_with_masks.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val_with_masks.record') create_tf_record(train_output_path, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only) create_tf_record(val_output_path, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only)
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): tf.logging.set_verbosity(tf.logging.INFO) required_flags = [ 'input_box_annotations_csv', 'input_images_directory', 'input_label_map', 'output_tf_record_path_prefix' ] for flag_name in required_flags: if not getattr(FLAGS, flag_name): raise ValueError('Flag --{} is required'.format(flag_name)) label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map) all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv) if FLAGS.input_image_label_annotations_csv: all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv) all_label_annotations.rename( columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True) else: all_label_annotations = None all_images = tf.gfile.Glob( os.path.join(FLAGS.input_images_directory, '*.jpg')) all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images] all_image_ids = pd.DataFrame({'ImageID': all_image_ids}) all_annotations = pd.concat( [all_box_annotations, all_image_ids, all_label_annotations]) tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids)) with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, FLAGS.output_tf_record_path_prefix, FLAGS.num_shards) for counter, image_data in enumerate(all_annotations.groupby('ImageID')): tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, counter) image_id, image_annotations = image_data # In OID image file names are formed by appending ".jpg" to the image ID. image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg') with tf.gfile.Open(image_path) as image_file: encoded_image = image_file.read() tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( image_annotations, label_map, encoded_image) if tf_example: shard_idx = int(image_id, 16) % FLAGS.num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString())
def process_images(image_files, output_path): print('# Started ' + output_path) annotations_dir = FLAGS.annotations_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) writer = tf.python_io.TFRecordWriter(output_path) for idx, image_file in enumerate(image_files): image_path = os.path.join(FLAGS.images_dir, image_file) print(idx, image_path) annotation_path = os.path.join(annotations_dir, os.path.splitext(image_file)[0] + '.xml') with tf.gfile.GFile(annotation_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, image_path, FLAGS.masks_dir, label_map_dict) writer.write(tf_example.SerializeToString()) writer.close()
def test_get_label_map_dict_from_proto(self): label_map_string = """ item { id:2 name:'cat' } item { id:1 name:'dog' } """ label_map_proto = text_format.Parse( label_map_string, string_int_label_map_pb2.StringIntLabelMap()) label_map_dict = label_map_util.get_label_map_dict(label_map_proto) self.assertEqual(label_map_dict['dog'], 1) self.assertEqual(label_map_dict['cat'], 2)
def convert_lisa_to_tfrecords(data_dir, output_path, label_map_path, validation_set_size): """ Convert the LISA detection dataset to TFRecords. :param data_dir: directory with the name "signDatabasePublicFramesOnly" :param output_path: suggest ./data :param label_map_path: full path to the label_map :param validation_set_size: default of 500 with flag settings :return: N/A """ label_map_dict = label_map_util.get_label_map_dict(label_map_path) train_count = 0 val_count = 0 annotations_dir = os.path.join(data_dir, 'allAnnotations.csv') train_writer = tf.python_io.TFRecordWriter( os.path.join(output_path, 'LISA_train.tfrecord')) val_writer = tf.python_io.TFRecordWriter( os.path.join(output_path, 'LISA_val.tfrecord')) # parse annotation csv file with open(annotations_dir) as csvFile: data_reader = csv.reader(csvFile, delimiter=';') next(data_reader) # for skipping first row parsed_annotations = [] for row in data_reader: parsed_annotations.append([row]) random.seed(49) random.shuffle(parsed_annotations) for img_num, parsed_annotation in enumerate(parsed_annotations): is_validation_img = img_num < validation_set_size image_path = os.path.join(data_dir, parsed_annotation[0][0]) example = prepare_example(image_path, parsed_annotation[0], label_map_dict) if is_validation_img: val_writer.write(example.SerializeToString()) val_count += 1 else: train_writer.write(example.SerializeToString()) train_count += 1 train_writer.close() val_writer.close() print("trained with %s images and validated with %s images" % (train_count, val_count))
def main(unused_argv): data_dir = FLAGS.data_dir if FLAGS.labels_dir is None: FLAGS.labels_dir = os.path.join(data_dir, 'labels') labels_map = label_map_util.get_label_map_dict(FLAGS.labels_map_path) label_files_train = os.listdir(FLAGS.labels_dir) label_files_train = [ os.path.join(FLAGS.labels_dir, file_name) for file_name in label_files_train ] output_path_train = FLAGS.output_path split_train_test = FLAGS.split_train_test print('Total samples: {}'.format(len(label_files_train))) if split_train_test: label_files_train, label_files_eval = train_test_split( label_files_train, test_size=split_train_test, shuffle=True) dir_path = os.path.dirname(output_path_train) if len(dir_path) and not os.path.isdir(dir_path): os.makedirs(dir_path) file_name_split = os.path.splitext(os.path.basename(output_path_train)) if file_name_split[1] == '': file_name_split = (file_name_split[0], '.record') output_path_train = os.path.join( dir_path, 'train_{}{}'.format(file_name_split[0], file_name_split[1])) output_path_eval = os.path.join( dir_path, 'test_{}{}'.format(file_name_split[0], file_name_split[1])) create_tf_record(label_files_train, data_dir, labels_map, output_path_train) print('TF record file for training created with {} samples: {}'.format( len(label_files_train), output_path_train)) if label_files_eval: create_tf_record(label_files_eval, data_dir, labels_map, output_path_eval) print( 'TF record file for validation created with {} samples: {}'.format( len(label_files_eval), output_path_eval))
def main(_): pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() with tf.gfile.GFile( os.path.join(FLAGS.result_base, FLAGS.pipeline_config_path), 'r') as f: text_format.Merge(f.read(), pipeline_config) text_format.Merge(FLAGS.config_override, pipeline_config) if FLAGS.input_shape: input_shape = [ int(dim) if dim != '-1' else None for dim in FLAGS.input_shape.split(',') ] else: input_shape = None if os.path.exists(FLAGS.model_dir) and os.path.isdir(FLAGS.model_dir): shutil.rmtree(FLAGS.model_dir) if not FLAGS.trained_checkpoint_prefix: path = os.path.join(FLAGS.result_base, FLAGS.trained_checkpoint_path) regex = re.compile(r"model\.ckpt-([0-9]+)\.index") numbers = [ int(regex.search(f).group(1)) for f in os.listdir(path) if regex.search(f) ] if not numbers: print('No checkpoint found!') exit() trained_checkpoint_prefix = os.path.join( path, 'model.ckpt-{}'.format(max(numbers))) else: trained_checkpoint_prefix = FLAGS.trained_checkpoint_prefix exporter.export_inference_graph( FLAGS.input_type, pipeline_config, trained_checkpoint_prefix, FLAGS.model_dir, input_shape=input_shape, write_inference_graph=FLAGS.write_inference_graph) label_map = get_label_map_dict( os.path.join(FLAGS.result_base, FLAGS.label_map_path)) label_array = [k for k in sorted(label_map, key=label_map.get)] with open(os.path.join(FLAGS.model_dir, FLAGS.output_label_path), 'w') as f: json.dump(label_array, f)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) data_dir = FLAGS.data_dir writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from PASCAL %s dataset.') annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) if FLAGS.set=='trainval': examples_path_val = os.path.join(data_dir,'ImageSets', 'Main', 'anomaly_val.txt') examples_path_train = os.path.join(data_dir,'ImageSets', 'Main', 'anomaly_train.txt') examples_list_train = dataset_util.read_examples_list(examples_path_train) examples_list_val = dataset_util.read_examples_list(examples_path_val) examples_list = examples_list_train + examples_list_val else: examples_path = os.path.join(data_dir,'ImageSets', 'Main', 'anomaly_' + FLAGS.set + '.txt') examples_list = dataset_util.read_examples_list(examples_path) #shuffle files shuffle(examples_list) for idx, example in enumerate(examples_list): if idx % 100 == 0: tf.logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close() tf.logging.info('TFrecord created!')
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') #generate trainval.txt # path = image_dir list_trainval = os.listdir(image_dir) file_name = [] for item in list_trainval: # print(item) temp = item.strip().split('.') # print(temp[0]) file_name.append(temp[0]) temp_dir = os.path.abspath(os.path.join(annotations_dir, 'trainval.txt')) with open(temp_dir, 'w') as f: for item in file_name: f.write(item + '\n') examples_path = os.path.join(annotations_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') # if FLAGS.faces_only: # train_output_path = os.path.join(FLAGS.output_dir, # 'pet_train_with_masks.record') # val_output_path = os.path.join(FLAGS.output_dir, # 'pet_val_with_masks.record') create_tf_record(train_output_path, label_map_dict, annotations_dir, image_dir, train_examples) create_tf_record(val_output_path, label_map_dict, annotations_dir, image_dir, val_examples)
def main(_): LOG_FILE = FLAGS.log_path if LOG_FILE is not None: with open(LOG_FILE, 'w') as log: log.write("IMG_PATH\n") label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) DATASET_PATH = os.path.normpath(FLAGS.data_dir) OUTPUT_PATH = os.path.normpath(FLAGS.output_path) if '%TYPE%' in DATASET_PATH: PATHS = [(DATASET_PATH.replace('%TYPE%', datasetType), OUTPUT_PATH.replace('%TYPE%', datasetType)) for datasetType in ['train', 'val']] else: PATHS = [(DATASET_PATH, FLAGS.output_path)] for datasetPath, outputPath in PATHS: logging.info(f'Using {datasetPath}') start = time() imageDirList = os.listdir(datasetPath) nbImage = len(imageDirList) record_dir = os.path.dirname(outputPath) if not os.path.exists(record_dir): os.makedirs(record_dir, exist_ok=True) num_shards = max( 1, nbImage // IMG_PER_SHARD + (0 if nbImage % IMG_PER_SHARD < IMG_PER_SHARD * 0.2 else 1)) if FLAGS.no_shard: num_shards = 1 with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, outputPath, num_shards) for idx, imageDir in enumerate(imageDirList): if LOG_FILE is not None: with open(LOG_FILE, 'a') as log: log.write(f"{imageDir}\n") if idx % 50 == 0: logging.info(f'On image {idx} of {len(imageDirList)}') IMAGE_DIR_PATH = os.path.join(datasetPath, imageDir) data = getImageData(str(IMAGE_DIR_PATH), label_map_dict) tf_example = data2TFExample(data) output_tfrecords[idx % num_shards].write( tf_example.SerializeToString()) total_time = time() - start m = int(total_time) // 60 s = int(total_time) % 60 print(f"{m:02d}:{s:02d}", flush=True)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) required_flags = [ 'input_annotations_csv', 'input_images_directory', 'input_label_map', 'output_tf_record_path_prefix' ] for flag_name in required_flags: if not getattr(FLAGS, flag_name): raise ValueError('Flag --{} is required'.format(flag_name)) label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map) all_annotations = pd.read_csv(FLAGS.input_annotations_csv) all_images = tf.gfile.Glob( os.path.join(FLAGS.input_images_directory, '*.jpg')) all_image_ids = [ os.path.splitext(os.path.basename(v))[0] for v in all_images ] all_image_ids = pd.DataFrame({'ImageID': all_image_ids}) all_annotations = pd.concat([all_annotations, all_image_ids]) tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids)) with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords( tf_record_close_stack, FLAGS.output_tf_record_path_prefix, FLAGS.num_shards) for counter, image_data in enumerate( all_annotations.groupby('ImageID')): tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, counter) image_id, image_annotations = image_data # In OID image file names are formed by appending ".jpg" to the image ID. print(FLAGS.input_images_directory, image_id, '.jpg') image_path = os.path.join(FLAGS.input_images_directory, str(image_id) + '.jpg') with tf.gfile.Open(image_path) as image_file: encoded_image = image_file.read() tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( image_annotations, label_map, encoded_image) if tf_example: shard_idx = int(image_id, 16) % FLAGS.num_shards output_tfrecords[shard_idx].write( tf_example.SerializeToString())
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') #H:\Dataset\Oxford-IIITPet\images annotations_dir = os.path.join( data_dir, 'annotations') #H:\Dataset\Oxford-IIITPet\annotations examples_path = os.path.join( annotations_dir, 'trainval.txt') #training and validation samples examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) #number of examples for training train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') if FLAGS.faces_only: train_output_path = os.path.join(FLAGS.output_dir, 'pet_train_with_masks.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_val_with_masks.record') create_tf_record( train_output_path, #H:\Dataset\Oxford-IIITPet\TFRecords label_map_dict, #H:\Dataset\Oxford-IIITPet\TFRecords annotations_dir, #H:\Dataset\Oxford-IIITPet\images\ image_dir, #H:\Dataset\Oxford-IIITPet\annotations train_examples, #training examples, data type: list faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type) # png create_tf_record( val_output_path, label_map_dict, annotations_dir, image_dir, val_examples, #validation examples, data type: list faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
def main(_): logging.info("Starting TF Record conversor ...") logging.info("Reading dataset from: {}".format(FLAGS.labeled_path)) logging.info("Output TF Record in: {}".format(FLAGS.output_path)) logging.info("Using label map file: {}".format(FLAGS.output_path)) writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) images, xmls = find_labeled_images(FLAGS.labeled_path) for image, label_img_xml in zip(images, xmls): tf_example = create_tf_example(image, label_img_xml, label_map_dict) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) class_impl = json.loads(FLAGS.class_implications.replace( "'", '"')) if FLAGS.class_implications else None class_prio = json.loads(FLAGS.class_priorities.replace( "'", '"')) if FLAGS.class_priorities else DEFAULT_CLASS_PRIORITIES assert isinstance(class_prio, list) def has_class_arg(target_class): return target_class is not None and target_class in class_prio if class_impl is None and has_class_arg( FLAGS.base_class) and has_class_arg(FLAGS.target_class): class_impl = derive_implications(class_prio, FLAGS.base_class, FLAGS.target_class) pprint(class_impl) image_dir = '/home/gabi/Desktop/AllDATA/DogData/val/JPEGImages' annotations_dir = '/home/gabi/Desktop/AllDATA/DogData/val/Annotations' output_path = '/home/gabi/Desktop/AllDATA/DogData/val' logging.info('Reading from dataset: ' + annotations_dir) examples_list = os.listdir(annotations_dir) writer = tf.python_io.TFRecordWriter(FLAGS.output_path) stats['impl_classes_replaced'] = 0 stats['impl_images_replaced'] = 0 for idx, example in enumerate(examples_list): if example.endswith('.xml'): if idx % 50 == 0: print('On image %d of %d' % (idx, len(examples_list))) path = os.path.join(annotations_dir, example) with tf.io.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, image_dir, label_map_dict, class_impl) writer.write(tf_example.SerializeToString()) writer.close() if class_impl is not None: print("Replaced {} classes in {} images with implied classes".format( stats['impl_classes_replaced'], stats['impl_images_replaced']))
def main(_): data_dir = FLAGS.data_dir mask_name = FLAGS.mask_name label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from LFW dataset.') image_dir = os.path.join(data_dir, 'images') # annotations_dir = os.path.join(data_dir, 'annotations') ADDED # examples_path = os.path.join(annotations_dir, 'trainval.txt') ADDED mask_dir = os.path.join(data_dir, mask_name) examples_list = [img_path[:-8]+".jpg" for img_path in os.listdir(mask_dir)] # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) if not FLAGS.faces_only: train_output_path = os.path.join(FLAGS.output_dir, 'pictures_with_masks_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pictures_with_masks_val.record') create_tf_record( train_output_path, FLAGS.num_shards, label_map_dict, image_dir, mask_name, train_examples, #liste avec les noms des images choisies pour le train faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type) create_tf_record( val_output_path, FLAGS.num_shards, label_map_dict, image_dir, #path to images mask_name, val_examples,#liste avec les noms des images choisies pour la validation faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
def main(_): args = process_command_line() if args.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) output = os.path.join(args.data_dir, args.output_path) # touch the file if it doesn't already exist if not os.path.exists(output): with open(output, 'a'): os.utime(output) writer = tf.python_io.TFRecordWriter(output) label_map_dict = label_map_util.get_label_map_dict( os.path.join(args.data_dir, args.label_map_path)) print('Reading from %s dataset.', args.collection) examples_path = os.path.join(args.data_dir, args.collection, args.set + '.txt') annotations_dir = os.path.join(args.data_dir, args.collection, args.annotations_dir) with open(examples_path) as fid: lines = fid.readlines() examples_list = [line.strip() for line in lines] ttl_objs = 0 for idx, example in enumerate(examples_list): if idx % 10 == 0: logging.info('Processing image %d of %d', idx, len(examples_list)) file = os.path.join(annotations_dir, example) with open(file, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example, num_objs = dict_to_tf_example(data, args.data_dir, label_map_dict, args.labels, conf.PNG_DIR) if tf_example: ttl_objs += num_objs writer.write(tf_example.SerializeToString()) else: logging.warn('No objects found in {0}'.format(example)) writer.close() print('Done. Found {0} examples in {1} set'.format(ttl_objs, args.set))
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) #import pdb; pdb.set_trace() for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'Airplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') #import pdb; pdb.set_trace() try: with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict( xml)['annotation'] #print("data: ", data) #print("FLAGS.data_dir: ", FLAGS.data_dir) tf_example = dict_to_tf_example( data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) except: print("error at: ", path) writer.close()
def _create_tf_data(self, annotation_file, ratio=0.7): category = [] obj_list = [] valid_obj_anno = 0 with open(annotation_file) as f: annotation_data = json.load(f) img_num = len(annotation_data) for i in range(img_num): if 'boundbox' in annotation_data[i]: obj_num = len(annotation_data[i]['boundbox']) valid_obj_anno += obj_num for j in range(obj_num): obj_idx_local = j img_idx = i obj_list.append((img_idx, obj_idx_local)) label = annotation_data[i]['boundbox'][j]['label'] if label not in category: category.append(label) if valid_obj_anno == 0: return valid_obj_anno label_map_file = os.path.join(self.local_path, 'label_map.pbtxt') with open(label_map_file, mode='w') as f: offset = 1 for idx in range(len(category)): f.write('item { \n id: %d\n name: \'%s\'\n}\n\n' % (idx + offset, category[idx])) random.seed(42) random.shuffle(obj_list) num_train = int(ratio * len(obj_list)) train_examples = obj_list[:num_train] val_examples = obj_list[num_train:] label_map_dict = label_map_util.get_label_map_dict(label_map_file) train_output_path = os.path.join(self.local_path, 'train.record') val_output_path = os.path.join(self.local_path, 'val.record') self.create_tf_record( train_output_path, annotation_data, label_map_dict, train_examples) self.create_tf_record( val_output_path, annotation_data, label_map_dict, val_examples)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) # logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') output_path = os.path.join(FLAGS.output_dir, 'train.record') create_tf_record(output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
def get_detection_graph_and_category_index(self, model_name, path_to_the_model_database): detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(self.path_to_the_inference_graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(self.path_to_label_map_file) self.num_classes = label_map_util.get_max_label_map_index(label_map) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.num_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) label_map_dict = label_map_util.get_label_map_dict( self.path_to_label_map_file, use_display_name=True) return detection_graph, category_index, label_map_dict
def __init__(self, config: dict): self._dataset = PascalVOCDataset() # check training parameters assert "model" in config and isinstance(config["model"], str), "`model` parameter is required, and must be an string" # check images folder assert "images_folder" in config and isinstance(config["images_folder"], str), "`images folder` parameter is required, and must be an string" self._images_folder = Path(config["images_folder"]) assert self._images_folder.exists(), "images folder not found" # reading config self._model_name = config["model"] self._masks_folder = Path(config.get("masks_folder", self._images_folder)) self._xml_folder = Path(config.get("xml_folder", self._images_folder)) # pre-trained model paths self._checkpoint_model_folder: Path = Path() self._checkpoint_model_pipeline_file: Path = Path() # check output folder if "output_folder" in config: assert isinstance(config["output_folder"], str), "`output_folder` must be an string" self._out_folder = Path(config["output_folder"]).joinpath(self._model_name) else: self._out_folder = Path(os.getcwd()).joinpath(os.path.sep.join(["models", self._model_name])) self._out_folder.mkdir(exist_ok=True, parents=True) # model attributes self._pipeline = None self._labels_map = None # reading label map labels_map = config.get("labels_map", None) if labels_map: if isinstance(labels_map, dict): self._labels_map = {k.title(): v for k,v in labels_map.items()} elif isinstance(labels_map, str) and os.path.isfile(labels_map): self._labels_map = get_label_map_dict(labels_map) else: raise Exception("`labels map` parameter must be a dictionary or a file") # new model paths self._pipeline_file = self._out_folder.joinpath("pipeline.config") self._labels_map_file = self._out_folder.joinpath("label_map.pbtxt") self._val_record_file = self._out_folder.joinpath("val.record") self._train_record_file = self._out_folder.joinpath("train.record")
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Pet dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') examples_path = os.path.join(annotations_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) # Test images are not included in the downloaded .data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record') if not FLAGS.faces_only: train_output_path = os.path.join(FLAGS.output_dir, 'pets_fullbody_with_masks_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'pets_fullbody_with_masks_val.record') create_tf_record( train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type) create_tf_record( val_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
def main(_): data_dir = FLAGS.data_dir train_output_path = FLAGS.output_dir image_dir = os.path.join(data_dir, FLAGS.image_dir) annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from dataset.') examples_list = os.listdir(image_dir) for el in examples_list: if el[-3:] != 'jpg': del examples_list[examples_list.index(el)] for el in examples_list: examples_list[examples_list.index(el)] = el[0:-4] create_tf_record(train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, examples_list)
def __init__(self, args): self.args = args #self.FULL_LABEL_CLASSES=args.FULL_LABEL_CLASSES self.threshold = args.threshold tf.keras.backend.clear_session() self.detect_fn = tf.saved_model.load(args.modelbasefolder) label_map_path=args.labelmappath #'./models/research/object_detection/data/mscoco_label_map.pbtxt' label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=label_map_util.get_max_label_map_index(label_map), use_display_name=True) self.category_index = label_map_util.create_category_index(categories) label_map_dict = label_map_util.get_label_map_dict(label_map, use_display_name=True) self.FULL_LABEL_CLASSES=list(label_map_dict.keys())
def main(_): images_dir = FLAGS.images_dir image_files = dataset_util.read_examples_list(FLAGS.image_list_path) annotations_dir = os.path.join(images_dir, FLAGS.annotations_dir) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) writer = tf.python_io.TFRecordWriter(FLAGS.output_path) for idx, image_file in enumerate(image_files): print(idx, image_file) image_file_split = image_file.split('/') annotation_path = os.path.join(annotations_dir, os.path.splitext(image_file_split[-1])[0] + '.xml') with tf.gfile.GFile(annotation_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, image_file, annotations_dir, label_map_dict, FLAGS.include_masks, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotation_masks') examples_list = os.listdir(annotations_dir) # examples_list = examples_list[:100] # Test images are not included in the downloaded data set, so we shall perform # our own split. random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_name = 'output_train_tf.record' val_name = 'output_val_tf.record' train_output_path = os.path.join(FLAGS.output_dir, train_name) val_output_path = os.path.join(FLAGS.output_dir, val_name) reset_counters() create_tf_record(train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, train_examples, mask_type=FLAGS.mask_type) print_counters("train") reset_counters() create_tf_record(val_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, val_examples, mask_type=FLAGS.mask_type) print_counters("eval")
def main(_): label_map_dict = label_map_util.get_label_map_dict( FLAGS.label_map_path) # label map --> FLAGS.label_map writer = tf.python_io.TFRecordWriter(FLAGS.output_path) dataset_list = FLAGS.data_dir.split(',') for dataset in dataset_list: if dataset.split('.')[-1] == r'yaml': ## FOR YAML examples_list = get_imgs_from_yaml(dataset) for example in examples_list: tf_example = create_tf_record(example, label_map_dict, is_yaml=True, ignore_difficult_instances=FLAGS. ignore_difficult_instances) writer.write(tf_example.SerializeToString()) else: ## FOR XML annotations_dir = os.path.join(dataset, FLAGS.annotations_dir) examples_list = [ os.path.splitext(name)[0] for name in os.listdir(dataset) if os.path.isfile(os.path.join(dataset, name)) ] for example in examples_list: path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict( xml)['annotation'] # convert the path to the current file directory data['path'] = os.path.join(os.path.abspath(dataset), os.path.basename(data['path'])) data['path'] = create_jpg_imgs(data['path']) tf_example = create_tf_record(data, label_map_dict, ignore_difficult_instances=FLAGS. ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def __init__(self, tensor_key, label_map_proto_file, shape_keys=None, shape=None, default_value=''): """Initializes the LookupTensor handler. Simply calls a vocabulary (most often, a label mapping) lookup. Args: tensor_key: the name of the `TFExample` feature to read the tensor from. label_map_proto_file: File path to a text format LabelMapProto message mapping class text to id. shape_keys: Optional name or list of names of the TF-Example feature in which the tensor shape is stored. If a list, then each corresponds to one dimension of the shape. shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is reshaped accordingly. default_value: The value used when the `tensor_key` is not found in a particular `TFExample`. Raises: ValueError: if both `shape_keys` and `shape` are specified. """ name_to_id = label_map_util.get_label_map_dict( label_map_proto_file, use_display_name=False) # We use a default_value of -1, but we expect all labels to be contained # in the label map. try: # Dynamically try to load the tf v2 lookup, falling back to contrib lookup = tf.compat.v2.lookup hash_table_class = tf.compat.v2.lookup.StaticHashTable except AttributeError: lookup = contrib_lookup hash_table_class = contrib_lookup.HashTable name_to_id_table = hash_table_class( initializer=lookup.KeyValueTensorInitializer( keys=tf.constant(list(name_to_id.keys())), values=tf.constant(list(name_to_id.values()), dtype=tf.int64)), default_value=-1) self._name_to_id_table = name_to_id_table super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape, default_value)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from Mappy Annotation dataset.') image_dir = os.path.join(data_dir, 'Images') annotations_dir = os.path.join(data_dir, 'Annotations') imageSets_dir = os.path.join(data_dir, 'ImageSets') train_examples_path = os.path.join(imageSets_dir, 'train.txt') train_examples = dataset_util.read_examples_list(train_examples_path) val_examples_path = os.path.join(imageSets_dir, 'test.txt') val_examples = dataset_util.read_examples_list(val_examples_path) # our own split. """ random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.7 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] """ logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'mappy_blur_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'mappy_blur_val.record') create_tf_record(train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type) create_tf_record(val_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
def main(_): data_dir = FLAGS.data_dir if not data_dir: logging.error('Must provide a data directory') return output_path = FLAGS.output_path if not output_path: logging.error('Must provide an output path') return label_map_path = FLAGS.label_map_path if not label_map_path: logging.error('Must provide a label map path') return writer = tf.python_io.TFRecordWriter(output_path) label_map_dict = label_map_util.get_label_map_dict(label_map_path) logging.info('Reading from data directory.') data_dir_jpg_query = os.path.join(data_dir, '*.jpg') for idx, image_path in enumerate(glob.glob(data_dir_jpg_query)): if idx % 20 == 0: logging.info('On image %d (%s)', idx, image_path) annotation_path = os.path.splitext(image_path)[0] + '.xml' with tf.gfile.GFile(annotation_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, image_path, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
def test_get_label_map_dict(self): label_map_string = """ item { id:2 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) label_map_dict = label_map_util.get_label_map_dict(label_map_path) self.assertEqual(label_map_dict['dog'], 1) self.assertEqual(label_map_dict['cat'], 2)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir') parser.add_argument('--output_dir') args = parser.parse_args() print(args.data_dir) label_map_path = os.path.join(args.data_dir, 'tf_label_map.pbtxt') class_dict = label_map_util.get_label_map_dict(label_map_path) text = u"" for i, name in enumerate(class_dict): print i, name txt = u"""{0} {1}""".format(i, name) text = text + txt output_path = os.path.join(args.output_dir, 'tf_labels.txt') with open(output_path, 'w') as f: f.write(text)
def test_get_label_map_dict(self): label_map_string = """ item { id:2 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) label_map_dict = label_map_util.get_label_map_dict(label_map_path) self.assertEqual(label_map_dict['dog'], 1) self.assertEqual(label_map_dict['cat'], 2)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) logging.info('Reading from dataset.') train_path = os.path.join(data_dir, 'train.txt') train_examples = dataset_util.read_examples_list(train_path) val_path = os.path.join(data_dir, 'eval.txt') val_examples = dataset_util.read_examples_list(val_path) logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples)) train_output_path = os.path.join(FLAGS.output_dir, 'tf_train_all.record') val_output_path = os.path.join(FLAGS.output_dir, 'tf_val_all.record') create_tf_record(train_output_path, label_map_dict, train_examples) create_tf_record(val_output_path, label_map_dict, val_examples)
def test_get_label_map_dict_with_fill_in_gaps_and_background(self): label_map_string = """ item { id:3 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) label_map_dict = label_map_util.get_label_map_dict( label_map_path, fill_in_gaps_and_background=True) self.assertEqual(label_map_dict['background'], 0) self.assertEqual(label_map_dict['dog'], 1) self.assertEqual(label_map_dict['class_2'], 2) self.assertEqual(label_map_dict['cat'], 3) self.assertEqual(len(label_map_dict), max(label_map_dict.values()) + 1)
def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use, label_map_path, validation_set_size): """Convert the KITTI detection dataset to TFRecords. Args: data_dir: The full path to the unzipped folder containing the unzipped data from data_object_image_2 and data_object_label_2.zip. Folder structure is assumed to be: data_dir/training/label_2 (annotations) and data_dir/data_object_image_2/training/image_2 (images). output_path: The path to which TFRecord files will be written. The TFRecord with the training set will be located at: <output_path>_train.tfrecord And the TFRecord with the validation set will be located at: <output_path>_val.tfrecord classes_to_use: List of strings naming the classes for which data should be converted. Use the same names as presented in the KIITI README file. Adding dontcare class will remove all other bounding boxes that overlap with areas marked as dontcare regions. label_map_path: Path to label map proto validation_set_size: How many images should be left as the validation set. (Ffirst `validation_set_size` examples are selected to be in the validation set). """ label_map_dict = label_map_util.get_label_map_dict(label_map_path) train_count = 0 val_count = 0 annotation_dir = os.path.join(data_dir, 'training', 'label_2') image_dir = os.path.join(data_dir, 'data_object_image_2', 'training', 'image_2') train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'% output_path) val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'% output_path) images = sorted(tf.gfile.ListDirectory(image_dir)) for img_name in images: img_num = int(img_name.split('.')[0]) is_validation_img = img_num < validation_set_size img_anno = read_annotation_file(os.path.join(annotation_dir, str(img_num).zfill(6)+'.txt')) image_path = os.path.join(image_dir, img_name) # Filter all bounding boxes of this frame that are of a legal class, and # don't overlap with a dontcare region. # TODO(talremez) filter out targets that are truncated or heavily occluded. annotation_for_image = filter_annotations(img_anno, classes_to_use) example = prepare_example(image_path, annotation_for_image, label_map_dict) if is_validation_img: val_writer.write(example.SerializeToString()) val_count += 1 else: train_writer.write(example.SerializeToString()) train_count += 1 train_writer.close() val_writer.close()
def __init__(self, load_instance_masks=False, instance_mask_type=input_reader_pb2.NUMERICAL_MASKS, label_map_proto_file=None, use_display_name=False, dct_method=''): """Constructor sets keys_to_features and items_to_handlers. Args: load_instance_masks: whether or not to load and handle instance masks. instance_mask_type: type of instance masks. Options are provided in input_reader.proto. This is only used if `load_instance_masks` is True. label_map_proto_file: a file path to a object_detection.protos.StringIntLabelMap proto. If provided, then the mapped IDs of 'image/object/class/text' will take precedence over the existing 'image/object/class/label' ID. Also, if provided, it is assumed that 'image/object/class/text' will be in the data. use_display_name: whether or not to use the `display_name` for label mapping (instead of `name`). Only used if label_map_proto_file is provided. dct_method: An optional string. Defaults to None. It only takes effect when image format is jpeg, used to specify a hint about the algorithm used for jpeg decompression. Currently valid values are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for example, the jpeg library does not have that specific option. Raises: ValueError: If `instance_mask_type` option is not one of input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or input_reader_pb2.PNG_MASKS. """ self.keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''), 'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''), 'image/height': tf.FixedLenFeature((), tf.int64, 1), 'image/width': tf.FixedLenFeature((), tf.int64, 1), # Object boxes and classes. 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 'image/object/class/label': tf.VarLenFeature(tf.int64), 'image/object/class/text': tf.VarLenFeature(tf.string), 'image/object/area': tf.VarLenFeature(tf.float32), 'image/object/is_crowd': tf.VarLenFeature(tf.int64), 'image/object/difficult': tf.VarLenFeature(tf.int64), 'image/object/group_of': tf.VarLenFeature(tf.int64), 'image/object/weight': tf.VarLenFeature(tf.float32), } if dct_method: image = slim_example_decoder.Image( image_key='image/encoded', format_key='image/format', channels=3, dct_method=dct_method) else: image = slim_example_decoder.Image( image_key='image/encoded', format_key='image/format', channels=3) self.items_to_handlers = { fields.InputDataFields.image: image, fields.InputDataFields.source_id: ( slim_example_decoder.Tensor('image/source_id')), fields.InputDataFields.key: ( slim_example_decoder.Tensor('image/key/sha256')), fields.InputDataFields.filename: ( slim_example_decoder.Tensor('image/filename')), # Object boxes and classes. fields.InputDataFields.groundtruth_boxes: ( slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')), fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor('image/object/area'), fields.InputDataFields.groundtruth_is_crowd: ( slim_example_decoder.Tensor('image/object/is_crowd')), fields.InputDataFields.groundtruth_difficult: ( slim_example_decoder.Tensor('image/object/difficult')), fields.InputDataFields.groundtruth_group_of: ( slim_example_decoder.Tensor('image/object/group_of')), fields.InputDataFields.groundtruth_weights: ( slim_example_decoder.Tensor('image/object/weight')), } if load_instance_masks: if instance_mask_type in (input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL_MASKS): self.keys_to_features['image/object/mask'] = ( tf.VarLenFeature(tf.float32)) self.items_to_handlers[ fields.InputDataFields.groundtruth_instance_masks] = ( slim_example_decoder.ItemHandlerCallback( ['image/object/mask', 'image/height', 'image/width'], self._reshape_instance_masks)) elif instance_mask_type == input_reader_pb2.PNG_MASKS: self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string) self.items_to_handlers[ fields.InputDataFields.groundtruth_instance_masks] = ( slim_example_decoder.ItemHandlerCallback( ['image/object/mask', 'image/height', 'image/width'], self._decode_png_instance_masks)) else: raise ValueError('Did not recognize the `instance_mask_type` option.') if label_map_proto_file: label_map = label_map_util.get_label_map_dict(label_map_proto_file, use_display_name) # We use a default_value of -1, but we expect all labels to be contained # in the label map. table = tf.contrib.lookup.HashTable( initializer=tf.contrib.lookup.KeyValueTensorInitializer( keys=tf.constant(list(label_map.keys())), values=tf.constant(list(label_map.values()), dtype=tf.int64)), default_value=-1) # If the label_map_proto is provided, try to use it in conjunction with # the class text, and fall back to a materialized ID. # TODO(lzc): note that here we are using BackupHandler defined in this # file(which is branching slim_example_decoder.BackupHandler). Need to # switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes # more popular. label_handler = BackupHandler( slim_example_decoder.LookupTensor( 'image/object/class/text', table, default_value=''), slim_example_decoder.Tensor('image/object/class/label')) else: label_handler = slim_example_decoder.Tensor('image/object/class/label') self.items_to_handlers[ fields.InputDataFields.groundtruth_classes] = label_handler