def input_fn(is_training, data_dir, batch_size, num_epochs=1, num_parallel_calls=1): """Input function which provides batches for train or eval. Args: is_training: A boolean denoting whether the input is for training. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_parallel_calls: The number of records that are processed in parallel. This can be optimized per data set but for generally homogeneous data sets, should be approximately the number of available CPU cores. Returns: A dataset that can be used for iteration. """ filenames = get_filenames(is_training, data_dir) dataset = tf.data.Dataset.from_tensor_slices(filenames) if is_training: # Shuffle the input files dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES) # Convert to individual records dataset = dataset.flat_map(tf.data.TFRecordDataset) return resnet.process_record_dataset(dataset, is_training, batch_size, _SHUFFLE_BUFFER, parse_record, num_epochs, num_parallel_calls)
def input_fn(is_training, data, labels, batch_size, num_epochs=1, num_parallel_calls=1, multi_gpu=False): """Input function which provides batches for train or eval. Args: is_training: A boolean denoting whether the input is for training. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_parallel_calls: The number of records that are processed in parallel. This can be optimized per data set but for generally homogeneous data sets, should be approximately the number of available CPU cores. multi_gpu: Whether this is run multi-GPU. Note that this is only required currently to handle the batch leftovers, and can be removed when that is handled directly by Estimator. Returns: A dataset that can be used for iteration. """ dataset = tf.data.Dataset.from_tensor_slices((data, labels)) if is_training: # Shuffle the input files dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES) num_images = len(labels) return resnet.process_record_dataset(dataset, is_training, batch_size, num_images, parse_record, num_epochs, num_parallel_calls)
def input_fn(is_training, data_dir, batch_size, num_epochs=1, num_parallel_calls=1): """Input_fn using the tf.data input pipeline for CIFAR-10 dataset. Args: is_training: A boolean denoting whether the input is for training. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_parallel_calls: The number of records that are processed in parallel. This can be optimized per data set but for generally homogeneous data sets, should be approximately the number of available CPU cores. Returns: A dataset that can be used for iteration. """ filenames = get_filenames(is_training, data_dir) dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES) return resnet.process_record_dataset(dataset, is_training, batch_size, _NUM_IMAGES['train'], parse_record, num_epochs, num_parallel_calls)
def input_fn(is_training, data_dir, batch_size, num_epochs=1, num_parallel_calls=1, multi_gpu=False): """Input function which provides batches for train or eval. Args: is_training: A boolean denoting whether the input is for training. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_parallel_calls: The number of records that are processed in parallel. This can be optimized per data set but for generally homogeneous data sets, should be approximately the number of available CPU cores. multi_gpu: Whether this is run multi-GPU. Note that this is only required currently to handle the batch leftovers, and can be removed when that is handled directly by Estimator. Returns: A dataset that can be used for iteration. """ filenames = get_filenames(is_training, data_dir) dataset = tf.data.Dataset.from_tensor_slices(filenames) if is_training: # Shuffle the input files dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES) num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES['validation'] # Convert to individual records dataset = dataset.flat_map(tf.data.TFRecordDataset) return resnet.process_record_dataset( dataset, is_training, batch_size, _SHUFFLE_BUFFER, parse_record, num_epochs, num_parallel_calls, examples_per_epoch=num_images, multi_gpu=multi_gpu)
def input_fn(is_training, data_dir, batch_size, num_epochs=1, num_parallel_calls=1, multi_gpu=False): """Input_fn using the tf.data input pipeline for CIFAR-10 dataset. Args: is_training: A boolean denoting whether the input is for training. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_parallel_calls: The number of records that are processed in parallel. This can be optimized per data set but for generally homogeneous data sets, should be approximately the number of available CPU cores. multi_gpu: Whether this is run multi-GPU. Note that this is only required currently to handle the batch leftovers, and can be removed when that is handled directly by Estimator. Returns: A dataset that can be used for iteration. """ filenames = get_filenames(is_training, data_dir) dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES) num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES['validation'] return resnet.process_record_dataset(dataset, is_training, batch_size, _NUM_IMAGES['train'], parse_record, num_epochs, num_parallel_calls, examples_per_epoch=num_images, multi_gpu=multi_gpu)
def input_fn(is_training, data_dir, channel_name, batch_size, num_epochs=1, num_parallel_calls=1, multi_gpu=False, mode='File'): """Input function which provides batches for train or eval. Args: is_training: A boolean denoting whether the input is for training. channel_name: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_parallel_calls: The number of records that are processed in parallel. This can be optimized per data set but for generally homogeneous data sets, should be approximately the number of available CPU cores. multi_gpu: Whether this is run multi-GPU. Note that this is only required currently to handle the batch leftovers, and can be removed when that is handled directly by Estimator. Returns: A dataset that can be used for iteration. """ dataset = None num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES[ 'validation'] if mode == 'File': filenames = get_filenames(data_dir) dataset = tf.data.Dataset.from_tensor_slices(filenames) # Convert to individual records dataset = dataset.flat_map(tf.data.TFRecordDataset) else: generator = TFRecordDatasetGenerator(channel_name) dataset = Dataset.from_generator(generator, tf.string) return resnet.process_record_dataset(dataset, is_training, batch_size, parse_record, num_epochs, num_parallel_calls, examples_per_epoch=num_images, multi_gpu=multi_gpu)
def input_fn(is_training, data_dir, batch_size, num_epochs=1, num_parallel_calls=1, multi_gpu=False): """Input_fn using the tf.data input pipeline for CIFAR-10 dataset. Args: is_training: A boolean denoting whether the input is for training. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_parallel_calls: The number of records that are processed in parallel. This can be optimized per data set but for generally homogeneous data sets, should be approximately the number of available CPU cores. multi_gpu: Whether this is run multi-GPU. Note that this is only required currently to handle the batch leftovers, and can be removed when that is handled directly by Estimator. Returns: A dataset that can be used for iteration. """ filenames = get_filenames(is_training, data_dir) dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES) num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES[ 'validation'] return resnet.process_record_dataset(dataset, is_training, batch_size, _NUM_IMAGES['train'], parse_record, num_epochs, num_parallel_calls, examples_per_epoch=num_images, multi_gpu=multi_gpu)