def read_dataset(file_read_func, decode_func, input_files, config): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) if config.shuffle: filename_dataset = filename_dataset.shuffle( config.filenames_shuffle_buffer_size) elif config.num_readers > 1: tf.logging.warning('`shuffle` is false, but the input data stream is ' 'still slightly shuffled since `num_readers` > 1.') filename_dataset = filename_dataset.repeat(config.num_epochs or None) records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=config.num_readers, block_length=config.read_block_length, sloppy=config.shuffle)) if config.shuffle: records_dataset = records_dataset.shuffle(config.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=config.num_parallel_map_calls) return tensor_dataset.prefetch(config.prefetch_size)
def __init__(self, in_pattern, batch_size, num_buckets=0, num_epochs=None): self._batch_size = batch_size self.num_buckets = num_buckets self._epoch = 0 self._step = 1. self.num_epochs = num_epochs file_pattern = in_pattern + '/examples.proto' if os.path.isdir(in_pattern) else in_pattern filenames = tf.matching_files(file_pattern) # filenames = tf.Print(filenames, [filenames], message='filenames: ') self.next_batch_op = self.input_pipeline(filenames, self._batch_size, self.num_buckets, self.num_epochs)
def testMatchingFiles(self): cases = ['ABcDEF.GH', 'ABzDEF.GH', 'ABasdfjklDEF.GH', 'AB3DEF.GH', 'AB4DEF.GH', 'ABDEF.GH', 'XYZ'] files = [tempfile.NamedTemporaryFile( prefix=c, dir=self.get_temp_dir(), delete=True) for c in cases] with self.test_session(): # Test exact match without wildcards. for f in files: self.assertEqual(tf.matching_files(f.name).eval(), tf.compat.as_bytes(f.name)) # We will look for files matching "ABxDEF.GH*" where "x" is some wildcard. pos = files[0].name.find(cases[0]) pattern = files[0].name[:pos] + 'AB%sDEF.GH*' self.assertEqual(set(tf.matching_files(pattern % 'z').eval()), self._subset(files, [1])) self.assertEqual(set(tf.matching_files(pattern % '?').eval()), self._subset(files, [0, 1, 3, 4])) self.assertEqual(set(tf.matching_files(pattern % '*').eval()), self._subset(files, [0, 1, 2, 3, 4, 5])) # NOTE(mrry): Windows uses PathMatchSpec to match file patterns, which # does not support the following expressions. if os.name != 'nt': self.assertEqual(set(tf.matching_files(pattern % '[cxz]').eval()), self._subset(files, [0, 1])) self.assertEqual(set(tf.matching_files(pattern % '[0-9]').eval()), self._subset(files, [3, 4])) for f in files: f.close()
def testMatchingFiles(self): cases = ['ABcDEF.GH', 'ABzDEF.GH', 'ABasdfjklDEF.GH', 'AB3DEF.GH', 'AB4DEF.GH', 'ABDEF.GH', 'XYZ'] files = [tempfile.NamedTemporaryFile(prefix=c) for c in cases] with self.test_session(): # Test exact match without wildcards. for f in files: self.assertEqual(tf.matching_files(f.name).eval(), tf.compat.as_bytes(f.name)) # We will look for files matching "ABxDEF.GH*" where "x" is some wildcard. pos = files[0].name.find(cases[0]) pattern = files[0].name[:pos] + 'AB%sDEF.GH*' self.assertEqual(set(tf.matching_files(pattern % 'z').eval()), self._subset(files, [1])) self.assertEqual(set(tf.matching_files(pattern % '?').eval()), self._subset(files, [0, 1, 3, 4])) self.assertEqual(set(tf.matching_files(pattern % '*').eval()), self._subset(files, [0, 1, 2, 3, 4, 5])) self.assertEqual(set(tf.matching_files(pattern % '[cxz]').eval()), self._subset(files, [0, 1])) self.assertEqual(set(tf.matching_files(pattern % '[0-9]').eval()), self._subset(files, [3, 4]))
def batch_inputs(paths, reference_shape, batch_size=32, is_training=False, num_landmarks=68): """Reads the files off the disk and produces batches. Args: paths: a list of directories that contain training images and the corresponding landmark files. reference_shape: a numpy array [num_landmarks, 2] batch_size: the batch size. is_traininig: whether in training mode. num_landmarks: the number of landmarks in the training images. Returns: images: a tf tensor of shape [batch_size, width, height, 3]. lms: a tf tensor of shape [batch_size, 68, 2]. lms_init: a tf tensor of shape [batch_size, 68, 2]. """ files = tf.concat(0, [tf.matching_files(d) for d in paths]) filename_queue = tf.train.string_input_producer( files, shuffle=is_training, capacity=1000) image, lms, lms_init = tf.py_func( partial(load_image, is_training=is_training), [filename_queue.dequeue(), reference_shape], # input arguments [tf.float32, tf.float32, tf.float32], # output types name='load_image' ) # The image has always 3 channels. image.set_shape([None, None, 3]) if is_training: image = distort_color(image) lms = tf.reshape(lms, [num_landmarks, 2]) lms_init = tf.reshape(lms_init, [num_landmarks, 2]) images, lms, inits = tf.train.batch( [image, lms, lms_init], batch_size=batch_size, num_threads=4, capacity=1000, enqueue_many=False, dynamic_pad=True ) return images, lms, inits
def get_dataset(tfrecords_dir, subset, batch_size): """Read TFRecords files and turn them into a TFRecordDataset.""" files = tf.matching_files(os.path.join(tfrecords_dir, '%s-*' % subset)) shards = tf.data.Dataset.from_tensor_slices(files) shards = shards.shuffle(tf.cast(tf.shape(files)[0], tf.int64)) shards = shards.repeat() dataset = shards.interleave(tf.data.TFRecordDataset, cycle_length=4) dataset = dataset.shuffle(buffer_size=8192) parser = partial( _parse_fn, is_training=True if subset == 'train' else False) dataset = dataset.apply( tf.data.experimental.map_and_batch( map_func=parser, batch_size=batch_size, num_parallel_calls=config.NUM_DATA_WORKERS)) dataset = dataset.prefetch(batch_size) return dataset
def read_dataset(file_read_func, decode_func, input_files, config): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat( [tf.matching_files(pattern) for pattern in input_files], 0) filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) if config.shuffle: filename_dataset = filename_dataset.shuffle( config.filenames_shuffle_buffer_size) elif config.num_readers > 1: tf.logging.warning('`shuffle` is false, but the input data stream is ' 'still slightly shuffled since `num_readers` > 1.') filename_dataset = filename_dataset.repeat(config.num_epochs or None) try: records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=config.num_readers, block_length=config.read_block_length, sloppy=True)) except: # For tf < 1.5 records_dataset = filename_dataset.apply( tf.contrib.data.sloppy_interleave( map_func=file_read_func, cycle_length=config.num_readers, block_length=config.read_block_length)) if config.shuffle: records_dataset.shuffle(config.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=config.num_parallel_map_calls) return tensor_dataset.prefetch(config.prefetch_size)
def read_dataset(file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. num_workers: Number of workers / shards. worker_index: Id for the current worker. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat( [tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) # Read file records and shuffle them. # If cycle_length is larger than the number of files, more than one reader # will be assigned to the same file, leading to repetition. cycle_length = tf.cast(tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) # TODO: find the optimal block_length. dataset = dataset.interleave(file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)
def _load_dataset(self, binaries_fname_pattern): """Creates an ImageNet data set (helper used by ``.make_*_datset`` below). Args: pattern (str): Pattern of the files from which to load images and labels (e.g. ``some/path/train-00000-of-01024``). Returns: A tf.data.Dataset yielding ImageNet data. """ with tf.name_scope(self._name): with tf.device("/cpu:0"): filenames = tf.matching_files(binaries_fname_pattern) filenames = tf.random_shuffle(filenames) data = tf.data.TFRecordDataset(filenames) return data
def tfrecords_input_fn(files_name_pattern, feature_spec, label, mode=tf.estimator.ModeKeys.EVAL, num_epochs=None, batch_size=64): shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False file_names = tf.matching_files(files_name_pattern) dataset = data.TFRecordDataset(filenames=file_names) if shuffle: dataset = dataset.shuffle(buffer_size=2 * batch_size + 1) dataset = dataset.batch(batch_size) dataset = dataset.map( lambda tf_example: parse_tf_example(tf_example, label, feature_spec)) dataset = dataset.repeat(num_epochs) return dataset
def input_pipeline(self, file_pattern, batch_size, num_epochs=None, num_threads=10): filenames = tf.matching_files(file_pattern) filename_queue = tf.train.string_input_producer(filenames, num_epochs=num_epochs, shuffle=True) parsed_batch = self.example_parser(filename_queue) min_after_dequeue = 10000 capacity = min_after_dequeue + 12 * batch_size next_batch = tf.train.batch(parsed_batch, batch_size=batch_size, capacity=capacity, num_threads=num_threads, dynamic_pad=True, allow_smaller_final_batch=True) return next_batch
def read_dataset( file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. num_workers: Number of workers / shards. worker_index: Id for the current worker. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) # Read file records and shuffle them. # If cycle_length is larger than the number of files, more than one reader # will be assigned to the same file, leading to repetition. cycle_length = tf.cast( tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) # TODO: find the optimal block_length. dataset = dataset.interleave( file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)
def input_fn(input_dir, mode, batch_size=1, num_epochs=100, label_name=None, feature_spec=None): """Reads TFRecords and returns the features and labels""" def read_and_decode_fn(example): """Parses Serialized Example.""" features = tf.parse_single_example(example, feature_spec) image = features['image'] image = tf.reshape(image, [256, 256, 256]) label = tf.cast(features[label_name], tf.int32) return image, label if feature_spec is None: tf_transform_output = tft.TFTransformOutput( os.path.join(input_dir, 'transformed_metadata')) feature_spec = tf_transform_output.transformed_feature_spec() prefix = str(mode).lower() suffix = '.tfrecord' num_cpus = multiprocessing.cpu_count() file_pattern = os.path.join(input_dir, 'data', prefix, prefix + '*' + suffix) filenames = tf.matching_files(file_pattern) dataset = tf.data.TFRecordDataset(filenames=filenames, buffer_size=None, num_parallel_reads=num_cpus) if mode == tf.estimator.ModeKeys.TRAIN: dataset = dataset.apply( tf.data.experimental.shuffle_and_repeat(buffer_size=100)) dataset = dataset.apply( tf.data.experimental.map_and_batch( map_func=read_and_decode_fn, batch_size=batch_size, num_parallel_calls=tf.data.experimental.AUTOTUNE)) iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() if mode == tf.estimator.ModeKeys.PREDICT: return features return features, labels
def input_fn(): inputs = ( user_input_fn(filenames_to_queue( tf.matching_files(FLAGS.infer_file) if mode == tf.contrib.learn.ModeKeys.INFER else {mode: tf.train.match_filenames_once( getattr(FLAGS, "{}_file".format(mode)), name="{}_filenames".format(mode)) for mode in [tf.contrib.learn.ModeKeys.TRAIN, tf.contrib.learn.ModeKeys.EVAL]}[mode])) if prepare_filename_queues else user_input_fn()) inputs = ((inputs,) if type(inputs) in {dict, tf.Tensor} else inputs) _check_inputs(inputs) return _batch_inputs(inputs, mode) if batch_inputs else inputs
def tf_loadVideoFromFile(tf_dirname): # tf_pattern = tf.concat([tf_dirname, '*.jpg'], 0) tf_matchingfiles = tf.matching_files(tf_dirname) n_files = tf.shape(tf_matchingfiles)[0] fileQ = tf.FIFOQueue(100, tf.string) fQinit = fileQ.enqueue_many((tf_matchingfiles, )) with tf.control_dependencies([fQinit]): # s_file = fileQ.dequeue() reader = tf.WholeFileReader() _, value = reader.read(fileQ) image_seq = tf.image.decode_jpeg(value, channels=3) image_seq = tf.expand_dims(image_seq, axis=0) # image_seq = tf.ones([2,2,2]) i = tf.constant(1) def condition(i, m): return tf.less(i, n_files) def body(i, m): # s_file =fileQ.dequeue() _, value = reader.read(fileQ) tf_s_image = tf.image.decode_jpeg(value, channels=3) tf_s_image = tf.expand_dims(tf_s_image, axis=0) # tf_s_image = tf.ones([2, 2, 2]) return i + 1, tf.concat(concat_dim=0, values=[m, tf_s_image]) _, loaded_images = tf.while_loop(cond=condition, body=body, loop_vars=[i, image_seq], shape_invariants=[ i.get_shape(), tf.TensorShape( [None, None, None, None]) ]) return loaded_images, fileQ
def dataset_input_fn(data_folder, prefix=None, mode=None, params=None, count=None): """Creates a dataset reading example from filenames. Args: data_folder: Location of the files finishing with a '/' prefix: Start of the file names mode: tf.estimator.ModeKeys(TRAIN, EVAL) params: hyperparameters Returns: features and targets """ shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False # Read CSV files into a Dataset filenames = tf.matching_files('{}{}*.csv'.format(data_folder, prefix)) dataset = tf.data.TextLineDataset(filenames) # Parse the record into tensors. dataset = dataset.map(parse_csv) # Shuffle the dataset if shuffle: dataset = dataset.shuffle(buffer_size=params.buffer_size) # Repeat the input indefinitely if count is None dataset = dataset.repeat(count=count) # Generate batches dataset = dataset.batch(params.batch_size) # Create a one-shot iterator iterator = dataset.make_one_shot_iterator() # Get batch X and y features, target = iterator.get_next() return features, target
def input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL, skip_header_lines=0, num_epochs=1, batch_size=200): shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False num_threads = multiprocessing.cpu_count() if MULTI_THREADING else 1 buffer_size = 2 * batch_size + 1 print("") print("* data input_fn:") print("================") print("Input file(s): {}".format(files_name_pattern)) print("Batch size: {}".format(batch_size)) print("Epoch Count: {}".format(num_epochs)) print("Mode: {}".format(mode)) print("Thread Count: {}".format(num_threads)) print("Shuffle: {}".format(shuffle)) print("================") print("") file_names = tf.matching_files(files_name_pattern) dataset = data.TextLineDataset(filenames=file_names) dataset = dataset.skip(skip_header_lines) if shuffle: dataset = dataset.shuffle(buffer_size) dataset = dataset.map(lambda tsv_row: parse_tsv_row(tsv_row), num_parallel_calls=num_threads) dataset = dataset.batch(batch_size) dataset = dataset.repeat(num_epochs) dataset = dataset.prefetch(buffer_size) iterator = dataset.make_one_shot_iterator() features, target = iterator.get_next() return features, parse_label_column(target)
def read_dataset( file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) cycle_length = tf.cast( tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) dataset = dataset.interleave( file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)
def csv_input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL, skip_header_lines=0, num_epochs=1, batch_size=20): file_names = tf.matching_files(files_name_pattern) dataset = data.TextLineDataset(filenames=file_names) dataset = dataset.skip(skip_header_lines) num_threads = multiprocessing.cpu_count() if MULTI_THREADING else 1 if mode == tf.estimator.ModeKeys.TRAIN: dataset = dataset.shuffle(buffer_size=2 * batch_size + 1) dataset = dataset.batch(batch_size) dataset = dataset.map(lambda csv_row: parse_csv_row(csv_row), num_parallel_calls=num_threads) # dataset = dataset.batch(batch_size) #??? very long time dataset = dataset.repeat(num_epochs) iterator = dataset.make_one_shot_iterator() features, target = iterator.get_next() return features, target
def load_tfrecords_dataset(self, dataset_path_format, image_size, num_parallel_calls=None): """ load TFRecord format dataset. :param dataset_path_format: str, path to dataset file format :param image_size: dict, contains original image size information, height, width, and channel :param num_parallel_calls: int or None, number of parallel processes to load dataset :return dataset: tf.data.Dataset API """ filenames = tf.matching_files(dataset_path_format) num_shard = tf.cast(tf.shape(filenames)[0], tf.int64) dataset = tf.data.Dataset.list_files(filenames).shuffle(num_shard) dataset = dataset.interleave( lambda filename: tf.data.TFRecordDataset(filename), cycle_length=num_shard) h = image_size['height'] w = image_size['width'] c = image_size['channel'] dataset = dataset.map(lambda x: self.parse_example(x, h, w, c), num_parallel_calls=num_parallel_calls) return dataset
def csv_input_fn(file_name, mode=tf.estimator.ModeKeys.EVAL, skip_header_lines=0, num_epochs=1, batch_size=500): shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False print(file_name) file_names = tf.matching_files(file_name) dataset = tf.data.TextLineDataset(filenames=file_names) dataset = dataset.skip(skip_header_lines) if shuffle: dataset = dataset.shuffle(buffer_size=2 * batch_size + 1) dataset = dataset.batch(batch_size) dataset = dataset.map(lambda csv_row: parse_csv_row(csv_row)) dataset = dataset.repeat(num_epochs) iterator = dataset.make_one_shot_iterator() features, target = iterator.get_next() return features, target
def input_fn(input_dir, mode, batch_size, num_epochs, label_name=None, shuffle_buffer_size=10000, feature_spec=None): """Reads TFRecords and returns the features and labels.""" if feature_spec is None: tf_transform_output = tft.TFTransformOutput( os.path.join(input_dir, 'transformed_metadata')) feature_spec = tf_transform_output.transformed_feature_spec() prefix = str(mode).lower() suffix = '.tfrecord' num_cpus = multiprocessing.cpu_count() file_pattern = os.path.join(input_dir, 'data', prefix, prefix + '*' + suffix) filenames = tf.matching_files(file_pattern) dataset = tf.data.TFRecordDataset(filenames=filenames, buffer_size=None, num_parallel_reads=num_cpus) if mode == tf.estimator.ModeKeys.TRAIN: dataset = dataset.shuffle(shuffle_buffer_size) dataset = dataset.repeat(num_epochs) dataset = dataset.batch(batch_size) dataset = dataset.map( lambda examples: tf.parse_example(examples, feature_spec)) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() if mode == tf.estimator.ModeKeys.PREDICT: return features label = features.pop(label_name) return features, label
def testMatchingFiles(self): cases = ["ABcDEF.GH", "ABzDEF.GH", "ABasdfjklDEF.GH", "AB3DEF.GH", "AB4DEF.GH", "ABDEF.GH", "XYZ"] files = [tempfile.NamedTemporaryFile(prefix=c) for c in cases] with self.test_session(): # Test exact match without wildcards. for f in files: self.assertEqual(tf.matching_files(f.name).eval(), f.name) # We will look for files matching "ABxDEF.GH*" where "x" is some wildcard. pos = files[0].name.find(cases[0]) pattern = files[0].name[:pos] + "AB%sDEF.GH*" self.assertEqual(set(tf.matching_files(pattern % "z").eval()), self._subset(files, [1])) self.assertEqual(set(tf.matching_files(pattern % "?").eval()), self._subset(files, [0, 1, 3, 4])) self.assertEqual(set(tf.matching_files(pattern % "*").eval()), self._subset(files, [0, 1, 2, 3, 4, 5])) self.assertEqual(set(tf.matching_files(pattern % "[cxz]").eval()), self._subset(files, [0, 1])) self.assertEqual(set(tf.matching_files(pattern % "[0-9]").eval()), self._subset(files, [3, 4]))
def input_fn(self, name, csv_path=None): """Creates a dataset object for the model to consume. Input function for estimator Arguments: name : string, Name of the data [Train or Eval] csv_path : The path of the csv on any storage system Returns: features : tf.data.TextLineDataset object, Dataset containing batch of features labels : tf.data.TextLineDataset object, Dataset containing batch of labels """ pattern = self._get_pattern(name, csv_path) tf.logging.info('The Pattern of files is : %s', pattern) filenames = tf.matching_files(pattern=pattern) dataset = tf.data.TextLineDataset(filenames).skip(1).map( self.parse_csv, num_parallel_calls=cpu_count()) dataset = dataset.shuffle(buffer_size=self.batch_size * 100) dataset = dataset.apply(tf.contrib.data.ignore_errors()) dataset = dataset.repeat(self.num_epochs) dataset = dataset.batch(self.batch_size) # determine the ideal number dataset = dataset.prefetch(self.buffer_size) iterator = dataset.make_one_shot_iterator() feats, labs = iterator.get_next() return feats, labs
def read_dataset(self, file_read_func, decode_func, input_files, num_epochs=None): filenames = tf.concat( [tf.matching_files(pattern) for pattern in input_files], 0) # Shard, shuffle, and read files. filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) filename_dataset = filename_dataset.shuffle( self.flags.filenames_shuffle_buffer_size) filename_dataset = filename_dataset.repeat(num_epochs) records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=self.flags.num_readers, block_length=self.flags.read_block_length, sloppy=True)) records_dataset.shuffle(self.flags.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=self.num_parallel_calls) return tensor_dataset.prefetch(self.flags.prefetch_size)
def tfrecods_input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL, num_epochs=None, batch_size=200): shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False print("") print("* data input_fn:") print("================") print("Input file(s): {}".format(files_name_pattern)) print("Batch size: {}".format(batch_size)) print("Epoch Count: {}".format(num_epochs)) print("Mode: {}".format(mode)) print("Shuffle: {}".format(shuffle)) print("================") print("") file_names = tf.matching_files(files_name_pattern) dataset = data.TFRecordDataset(filenames=file_names) if shuffle: dataset = dataset.shuffle(buffer_size=2 * batch_size + 1) dataset = dataset.batch(batch_size) dataset = dataset.map(lambda tf_example: parse_tf_example(tf_example)) if PROCESS_FEATURES: dataset = dataset.map( lambda features, target: (process_features(features), target)) dataset = dataset.repeat(num_epochs) iterator = dataset.make_one_shot_iterator() features, target = iterator.get_next() return features, target
def testMatchingFiles(self): cases = ['ABcDEF.GH', 'ABzDEF.GH', 'ABasdfjklDEF.GH', 'AB3DEF.GH', 'AB4DEF.GH', 'ABDEF.GH', 'XYZ'] files = [tempfile.NamedTemporaryFile(prefix=c) for c in cases] with self.test_session(): # Test exact match without wildcards. for f in files: self.assertEqual(tf.matching_files(f.name).eval(), f.name) # We will look for files matching "ABxDEF.GH*" where "x" is some wildcard. pos = files[0].name.find(cases[0]) pattern = files[0].name[:pos] + 'AB%sDEF.GH*' self.assertEqual(set(tf.matching_files(pattern % 'z').eval()), self._subset(files, [1])) self.assertEqual(set(tf.matching_files(pattern % '?').eval()), self._subset(files, [0, 1, 3, 4])) self.assertEqual(set(tf.matching_files(pattern % '*').eval()), self._subset(files, [0, 1, 2, 3, 4, 5])) self.assertEqual(set(tf.matching_files(pattern % '[cxz]').eval()), self._subset(files, [0, 1])) self.assertEqual(set(tf.matching_files(pattern % '[0-9]').eval()), self._subset(files, [3, 4]))
def load_tfrecord_dataset(batch_size, split='train', size=128, augmentation=False, shuffle=True, classes=None, normalize=True, dequantize=True, reader_threads=32): """Read the images and labels from 'filenames'.""" filenames = tf.matching_files( os.path.join(TFRECORD_ROOT, "%s-00*-of-00*" % split)) dataset = tf.data.TFRecordDataset(filenames).repeat() if 'imagenet64' in split: disk_sz = 64 elif 'sngan_128' in split or 'imagenet128' in split: disk_sz = 128 else: disk_sz = 256 def parse_single_example(serialized_example, height=disk_sz, width=disk_sz, depth=3): """Parses a single tf.Example into image and label tensors.""" features = tf.parse_single_example(serialized_example, features={ 'image': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64), }) image = tf.decode_raw(features['image'], tf.uint8) image.set_shape([height * width * depth]) # Reshape from [height * width * depth] to [height, width, depth]. image = tf.reshape(image, [height, width, depth]) image = tf.cast(image, tf.float32) image = tf.image.resize_images(image, [size, size]) if augmentation: image = tf.image.random_flip_left_right(image) label = features['label'] if normalize: image = (image / 128.0 - 1.0) if dequantize: image = image + tf.random_uniform( shape=[size, size, 3], minval=0.0, maxval=1. / 128) return image, label if shuffle: dataset = dataset.shuffle(buffer_size=256) dataset = dataset.map(parse_single_example, num_parallel_calls=reader_threads) if classes is not None: classes = tf.convert_to_tensor(classes, dtype=tf.int64) dataset = dataset.filter( lambda x, y: tf.reduce_any(tf.equal(y, classes))) dataset = dataset.repeat() if shuffle: dataset = dataset.shuffle(buffer_size=10000) images, labels = dataset.batch( batch_size).make_one_shot_iterator().get_next() return images, labels
def _input_fn(): print("\nread_dataset_frame: _input_fn: file_pattern = {}".format(file_pattern)) print("read_dataset_frame: _input_fn: mode = {}".format(mode)) print("read_dataset_frame: _input_fn: batch_size = {}".format(batch_size)) # This function dequantizes our tensors to bring them back to full floating point precision def dequantize(feat_vector, max_quantized_value = 2, min_quantized_value = -2): assert max_quantized_value > min_quantized_value # ensure the max value is larger than the min value quantized_range = max_quantized_value - min_quantized_value # find the range between max and min scalar = quantized_range / 255.0 # create a scale factor where 0 is the min and 1 is the max bias = (quantized_range / 512.0) + min_quantized_value # create bias term to shift our scaled feature vector return feat_vector * scalar + bias # return the scaled and shifted feature vector # This function resizes our frames axis so that we only get a subset of frames def resize_axis(tensor, axis, new_size, fill_value = 0): tensor = tf.convert_to_tensor(value = tensor) # ensure tensor is a tensor shape = tf.unstack(value = tf.shape(input = tensor)) # create a list where each element is a 1-D tensor the size of each dimension in tensor pad_shape = shape[:] # create a copy of the shape list of 1-D tensors pad_shape[axis] = tf.maximum(x = 0, y = new_size - shape[axis]) # change the size of the axis dimension to the maximum of 0 and the new size of our padded shape shape[axis] = tf.minimum(x = shape[axis], y = new_size) # change the size of the axis dimension to the minimum of our original shape and the new size of our padded shape shape = tf.stack(values = shape) # stack the list of tensor sizes back into a larger tensor resized = tf.concat(values = [ tf.slice(input_ = tensor, begin = tf.zeros_like(tensor = shape), size = shape), # slice the tensor starting at the 0th index in each dimension and going as far as our adjusted shape in each dimension tf.fill(dims = tf.stack(values = pad_shape), value = tf.cast(x = fill_value, dtype = tensor.dtype)) # fill the rest of the tensor with the fill value ], axis = axis) # concatenate our sliced tensor with our fill value tensor together new_shape = tensor.get_shape().as_list() # get the static shape of the tensor and output it to a list new_shape[axis] = new_size # change the static shape's axis to our new size resized.set_shape(shape = new_shape) # set the static shape of our resized tensor to our new shape return resized # return the resized tensor # Read files from file_pattern which provided by args input_file_names = tf.matching_files(pattern = file_pattern) # Determine amount of times to repeat file and if we should shuffle the file queue based on if we are training or evaluating if mode == tf.estimator.ModeKeys.TRAIN: num_epochs = None # forever shuffle = True else: num_epochs = 1 # until EOF shuffle = False # Create filename queue from our input file names filename_queue = tf.train.string_input_producer(string_tensor = input_file_names, num_epochs = num_epochs, shuffle = shuffle) # Create a TF Record reader to read in our TF Record files reader = tf.TFRecordReader() # Use our TF Record reader to read from the filename queue queue, serialized_examples = reader.read(queue = filename_queue) # Create context and sequence feature map context_features = { "video_id": tf.FixedLenFeature(shape = [], dtype = tf.string), "labels": tf.VarLenFeature(dtype = tf.int64) } sequence_features = { "rgb": tf.FixedLenSequenceFeature(shape = [], dtype = tf.string), "audio": tf.FixedLenSequenceFeature(shape = [], dtype = tf.string) } # Parse TF Records into our features contexts, features = tf.parse_single_sequence_example( serialized = serialized_examples, context_features = context_features, sequence_features = sequence_features) print("read_dataset_frame: _input_fn: contexts = {}".format(contexts)) # shape = video_id = (), labels = SparseTensor object print("read_dataset_frame: _input_fn: features = {}".format(features)) # shape = rgb = (frames_per_video,), audio = (frames_per_video,) # Create features # Pass video_id to features features['video_id'] = contexts['video_id'] # shape = video_id = (), rgb = (frames_per_video,), audio = (frames_per_video,) print("read_dataset_frame: _input_fn: features = {}".format(features)) # Fix rgb data decoded_rgb = tf.reshape(tensor = tf.cast(x = tf.decode_raw(bytes = features["rgb"], out_type = tf.uint8), dtype = tf.float32), shape = [-1, 1024]) # shape = (frames_per_video, 1024) print("read_dataset_frame: _input_fn: decoded_rgb = {}".format(decoded_rgb)) rgb_matrix = resize_axis(tensor = dequantize(decoded_rgb), axis = 0, new_size = MAX_FRAMES) # shape = (MAX_FRAMES, 1024) print("read_dataset_frame: _input_fn: rgb_matrix = {}".format(rgb_matrix)) features['rgb'] = rgb_matrix print("read_dataset_frame: _input_fn: features = {}".format(features)) # shape = video_id = (), rgb = (MAX_FRAMES, 1024), audio = (frames_per_video,) # Fix audio data decoded_audio = tf.reshape(tensor = tf.cast(x = tf.decode_raw(bytes = features["audio"], out_type = tf.uint8), dtype = tf.float32), shape = [-1, 128]) # shape = (frames_per_video, 128) print("read_dataset_frame: _input_fn: decoded_audio = {}".format(decoded_audio)) audio_matrix = resize_axis(tensor = dequantize(decoded_audio), axis = 0, new_size = MAX_FRAMES) # shape = (MAX_FRAMES, 128) print("read_dataset_frame: _input_fn: audio_matrix = {}".format(audio_matrix)) features['audio'] = audio_matrix print("read_dataset_frame: _input_fn: features = {}".format(features)) # shape = video_id = (), rgb = (MAX_FRAMES, 1024), audio = (MAX_FRAMES, 128) # Add labels to features dictionary and change to correct format from sparse to dense and to floats features['labels'] = tf.cast(x = tf.sparse_to_dense(sparse_indices = contexts['labels'].values, output_shape = (NUM_CLASSES,), sparse_values = 1, validate_indices = False), dtype = tf.float32) print("read_dataset_frame: _input_fn: features = {}".format(features)) # shape = video_id = (), rgb = (MAX_FRAMES, 1024), audio = (MAX_FRAMES, 128), labels = (NUM_CLASSES,) # Shuffle and batch features batch_features = tf.train.shuffle_batch( tensors = features, batch_size = batch_size, capacity = batch_size * 10, min_after_dequeue = batch_size, num_threads = 1, enqueue_many = False, allow_smaller_final_batch = True) print("read_dataset_frame: _input_fn: batch_features = {}".format(batch_features)) # shape = video_id = (batch_size,), rgb = (batch_size, MAX_FRAMES, 1024), audio = (batch_size, MAX_FRAMES, 128), labels = (batch_size, NUM_CLASSES) # Pop off labels from feature dictionary batch_labels = batch_features.pop('labels') print("read_dataset_frame: _input_fn: batch_labels = {}\n".format(batch_labels)) # shape = (batch_size, NUM_CLASSES) return batch_features, batch_labels
def _get_tfrecords_files(self): full_pattern = os.path.join(self.tfrecords_dir, self.tfrecords_pattern) tfrecords_files = tf.matching_files(full_pattern) return tfrecords_files
def _input_fn(): print("\nread_dataset_video: _input_fn: file_pattern = {}".format( file_pattern)) print("read_dataset_video: _input_fn: mode = {}".format(mode)) print("read_dataset_video: _input_fn: batch_size = {}".format( batch_size)) # Read files from file_pattern which provided by args input_file_names = tf.matching_files(file_pattern) # Determine amount of times to repeat file and if we should shuffle the file queue based on if we are training or evaluating if mode == tf.estimator.ModeKeys.TRAIN: num_epochs = None # forever shuffle = True else: num_epochs = 1 # until EOF shuffle = False # Create filename queue from our input file names filename_queue = tf.train.string_input_producer( string_tensor=input_file_names, num_epochs=num_epochs, shuffle=shuffle) # Create a TF Record reader to read in our TF Record files reader = tf.TFRecordReader() # Use our TF Record reader to read from the filename queue queue, serialized_examples = reader.read(queue=filename_queue) # Create feature map feature_map = { 'video_id': tf.FixedLenFeature(shape=[], dtype=tf.string), 'labels': tf.VarLenFeature(dtype=tf.int64), 'mean_rgb': tf.FixedLenFeature(shape=[1024], dtype=tf.float32), 'mean_audio': tf.FixedLenFeature(shape=[128], dtype=tf.float32) } # Parse TF Records into our features features = tf.parse_single_example(serialized=serialized_examples, features=feature_map) print( "read_dataset_video: _input_fn: features = {}".format(features) ) # shape = video_id = (), mean_rgb = (1024,), mean_audio = (128,), labels = SparseTensor object # Add labels to features dictionary and change to correct format from sparse to dense and to floats features['labels'] = tf.cast(x=tf.sparse_to_dense( sparse_indices=features['labels'].values, output_shape=(NUM_CLASSES, ), sparse_values=1, validate_indices=False), dtype=tf.float32) print( "read_dataset_video: _input_fn: features = {}".format(features) ) # shape = video_id = (), mean_rgb = (1024,), mean_audio = (128,), labels = (NUM_CLASSES,) # Shuffle and batch features batch_features = tf.train.shuffle_batch(tensors=features, batch_size=batch_size, capacity=batch_size * 10, min_after_dequeue=batch_size, num_threads=1, enqueue_many=False, allow_smaller_final_batch=True) print( "read_dataset_video: _input_fn: batch_features = {}".format( batch_features) ) # shape = video_id = (batch_size,), mean_rgb = (batch_size, 1024), mean_audio = (batch_size, 128), labels = (batch_size, NUM_CLASSES) # Pop off labels from feature dictionary batch_labels = batch_features.pop('labels') print("read_dataset_video: _input_fn: batch_labels = {}".format( batch_labels)) # shape = (batch_size, NUM_CLASSES) return batch_features, batch_labels
def _make_dataset(self, binaries_fname_pattern, data_augmentation=False, shuffle=True): """Creates a SVHN dataset (helper used by ``.make_*_datset`` below). Args: binaries_fname_pattern (str): Pattern of the ``.bin`` files from which to load images and labels (e.g. ``some/path/data_batch_*.bin``). data_augmentation (bool): Whether to apply data augmentation operations. shuffle (bool): Switch to turn on or off shuffling of the data set. Defaults to ``True``. Returns: A tf.data.Dataset yielding batches of SVHN data. """ # Set number of bytes to read. label_bytes = 1 label_offset = 0 num_classes = 10 depth = 3 image_size = 32 image_bytes = image_size * image_size * depth record_bytes = label_bytes + label_offset + image_bytes def parse_func(raw_record): """Function parsing data from raw binary records.""" # Decode raw_record. record = tf.reshape( tf.decode_raw(raw_record, tf.uint8), [record_bytes]) label = tf.cast( tf.slice(record, [label_offset], [label_bytes]), tf.int32) image = tf.reshape( tf.slice(record, [label_bytes], [image_bytes]), [image_size, image_size, depth]) image = tf.cast(image, tf.float32) # Add image pre-processing. if data_augmentation: image = tf.image.resize_image_with_crop_or_pad( image, image_size + 4, image_size + 4) image = tf.random_crop(image, [32, 32, 3]) image = tf.image.random_brightness(image, max_delta=63. / 255.) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_contrast(image, lower=0.2, upper=1.8) else: image = tf.image.resize_image_with_crop_or_pad(image, 32, 32) image = tf.image.per_image_standardization(image) label = tf.squeeze(tf.one_hot(label, depth=num_classes)) return image, label with tf.name_scope(self._name): with tf.device('/cpu:0'): filenames = tf.matching_files(binaries_fname_pattern) filenames = tf.random_shuffle(filenames) data = tf.data.FixedLengthRecordDataset( filenames=filenames, record_bytes=record_bytes) data = data.map( parse_func, num_parallel_calls=(8 if data_augmentation else 4)) if shuffle: data = data.shuffle(buffer_size=20000) data = data.batch(self._batch_size, drop_remainder=True) data = data.prefetch(buffer_size=4) return data
def __init__(self, filenames, vocab_file, max_text_length=400, batch_size=128, skip_header_lines=0, text_feature_name='review', target_name='class', weight_column_name='weight', pad_word='#@PAD@#', csv_header=['class', 'polarity', 'source', 'fold', 'file', 'review'], target_labels=['False', 'True'], num_epochs=None, multi_threading=True, prefetch=1, words_to_ids=True, shuffle=True ): #self.filenames = tf.gfile.Glob(filenames) self.filenames = filenames #'data/op_spam_v1.4/vocab.csv' with open(vocab_file) as f: self.n_words = sum(1 for line in f) + 2 self.vocab_file = vocab_file self.csv_header = csv_header self.max_text_length=max_text_length self.text_feature_name=text_feature_name self.target_name=target_name self.weight_column_name=weight_column_name self.pad_word=pad_word self.target_labels=target_labels num_threads = multiprocessing.cpu_count() if multi_threading else 1 buffer_size = 2 * batch_size + 1 print("") print("* data input_fn:") print("=" * 20) print("Input file(s): {}".format(self.filenames)) print("Batch size: {}".format(batch_size)) print("Epoch Count: {}".format(num_epochs)) print("Thread Count: {}".format(num_threads)) print("Shuffle: {}".format(shuffle)) print("=" * 20) print("") dataset = tf.data.TextLineDataset(filenames=tf.matching_files(self.filenames)) dataset = dataset.skip(skip_header_lines) if shuffle: dataset = dataset.shuffle(buffer_size) dataset = dataset.map( lambda tsv_row: self.parse_csv_row(tsv_row), num_parallel_calls=num_threads) if batch_size: dataset = dataset.batch(batch_size) if num_epochs: dataset = dataset.repeat(num_epochs) if prefetch: dataset = dataset.prefetch(prefetch) iterator = dataset.make_one_shot_iterator() features, target = iterator.get_next() target = self.labels_to_ids(target) if words_to_ids: features[text_feature_name] = self.words_to_ids(features[text_feature_name]) self.features = features self.target = target
def match_files(self, type): with tf.name_scope("Match"): files = tf.matching_files(type + "*", name="match") #files=tf.train.match_filenames_once(type,name="match") #print(files) return files
return tf.concat(3, tensor_list) def concat(val, ax): return tf.concat(ax, val) #get file queue working... https://stackoverflow.com/questions/37126108/how-to-read-data-into-tensorflow-batches-from-example-queue initstddev = 1 dropout = 0.85 num_epochs = 200 num_batches = 31 batch_size_static = 88 filenames = tf.matching_files("./train_data/*.jpg") #test on puck4 pls print(filenames) filename_queue = tf.train.string_input_producer(filenames, shuffle=False, name="DEMON_FROM_HELL") images, names = getImage(filename_queue) #batch, labels= tf.train.shuffle_batch([images, names], batch_size=batch_size_static, capacity=1000 + 3 * batch_size_static, allow_smaller_final_batch=True, min_after_dequeue=1000) batch, labels = tf.train.batch([images, names], batch_size=batch_size_static, capacity=1000 + 3 * batch_size_static, allow_smaller_final_batch=True) save_path = 'TrainedModel'
def dataset_input_fn(file_names_pattern, file_encoding='csv', mode=tf.estimator.ModeKeys.EVAL, skip_header_lines=0, num_epochs=1, batch_size=200, multi_threading=True): """An input function for training or evaluation. This uses the Dataset APIs. Args: file_names_pattern: [str] - file name or file name patterns from which to read the data. mode: tf.estimator.ModeKeys - either TRAIN or EVAL. Used to determine whether or not to randomize the order of data. file_encoding: type of the text files. Can be 'csv' or 'tfrecords' skip_header_lines: int set to non-zero in order to skip header lines in CSV files. num_epochs: int - how many times through to read the data. If None will loop through data indefinitely batch_size: int - first dimension size of the Tensors returned by input_fn multi_threading: boolean - indicator to use multi-threading or not Returns: A function () -> (features, indices) where features is a dictionary of Tensors, and indices is a single Tensor of label indices. """ shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False data_size = parameters.HYPER_PARAMS.train_size if mode == tf.estimator.ModeKeys.TRAIN else None num_threads = multiprocessing.cpu_count() if multi_threading else 1 buffer_size = 2 * batch_size + 1 print("") print("* data input_fn:") print("================") print("Mode: {}".format(mode)) print("Input file(s): {}".format(file_names_pattern)) print("Files encoding: {}".format(file_encoding)) print("Data size: {}".format(data_size)) print("Batch size: {}".format(batch_size)) print("Epoch Count: {}".format(num_epochs)) print("Thread Count: {}".format(num_threads)) print("Shuffle: {}".format(shuffle)) print("================") print("") file_names = tf.matching_files(file_names_pattern) if file_encoding == 'csv': dataset = data.TextLineDataset(filenames=file_names) dataset = dataset.skip(skip_header_lines) dataset = dataset.map(lambda csv_row: parse_csv(csv_row)) else: dataset = data.TFRecordDataset(filenames=file_names) dataset = dataset.map(lambda tf_example: parse_tf_example(tf_example), num_parallel_calls=num_threads) dataset = dataset.map(lambda features: get_features_target_tuple(features), num_parallel_calls=num_threads) dataset = dataset.map(lambda features, target: (process_features(features), target), num_parallel_calls=num_threads) if shuffle: dataset = dataset.shuffle(buffer_size) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(buffer_size) dataset = dataset.repeat(num_epochs) iterator = dataset.make_one_shot_iterator() features, target = iterator.get_next() return features, target
def get_edof_training_queue(target_dir, patch_size, batch_size, num_depths=4, color=False, num_threads=4, loop=True, filetype='jpg'): if filetype == 'jpg': file_list = tf.matching_files(os.path.join(target_dir, '*.jpg')) elif filetype == 'png': file_list = tf.matching_files(os.path.join(target_dir, '*.png')) filename_queue = tf.train.string_input_producer(file_list, num_epochs=None if loop else 1, shuffle=True if loop else False) image_reader = tf.WholeFileReader() _, image_file = image_reader.read(filename_queue) if filetype == 'jpg': if color: print("Using color images") image = tf.image.decode_jpeg(image_file, channels=0) else: print("Using black and white images") image = tf.image.decode_jpeg(image_file, channels=1) elif filetype == 'png': if color: print("Using color images") image = tf.image.decode_png(image_file, channels=0) else: print("Using black and white images") image = tf.image.decode_png(image_file, channels=1) image = tf.cast(image, tf.float32) # Shape [height, width, 1] image = tf.expand_dims(image, 0) image /= 255. # Get the ratio of the patch size to the smallest side of the image img_height_width = tf.cast(tf.shape(image)[1:3], tf.float32) size_ratio = patch_size / tf.reduce_min(img_height_width) # Extract a glimpse from the image offset_center = tf.random_uniform([1, 2], minval=0.0 + size_ratio / 2, maxval=1.0 - size_ratio / 2, dtype=tf.float32) offset_center = offset_center * img_height_width image = tf.image.extract_glimpse(image, size=[patch_size, patch_size], offsets=offset_center, centered=False, normalized=False) image = tf.squeeze(image, 0) all_depths = tf.convert_to_tensor([1 / 2, 1 / 1.5, 1 / 1, 1 / 0.5, 1000], tf.float32) depth_bins = [] for i in range(num_depths): depth_idx = tf.multinomial(tf.log([5 * [1 / 5]]), num_samples=1) depth_bins.append(all_depths[tf.cast(depth_idx[0][0], tf.int32)]) test_depth = np.concatenate( [np.ones((patch_size // len(depth_bins), patch_size)) * i for i in range(len(depth_bins))], axis=0)[:, :, None] if color: patch_dims = [patch_size, patch_size, 3] else: patch_dims = [patch_size, patch_size, 1] image_batch, depth_batch = tf.train.batch([image, test_depth], shapes=[patch_dims, [patch_size, patch_size, 1]], batch_size=batch_size, num_threads=num_threads, capacity=4 * batch_size) tf.summary.image("input_img", image_batch) tf.summary.scalar("input_img_max", tf.reduce_max(image_batch)) tf.summary.scalar("input_img_min", tf.reduce_min(image_batch)) tf.summary.histogram('depth', depth_bins) tf.summary.image('depth', tf.cast(depth_batch, tf.float32)) return image_batch, depth_batch, depth_bins