Python process_record_dataset Examples, resnet.process_record_dataset Python Examples

Example #1

0

Show file

File: imagenet_main.py Project: wystephen/LightPythonProject

def input_fn(is_training,
             data_dir,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1):
    """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
  Returns:
    A dataset that can be used for iteration.
  """
    filenames = get_filenames(is_training, data_dir)
    dataset = tf.data.Dataset.from_tensor_slices(filenames)

    if is_training:
        # Shuffle the input files
        dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)

    # Convert to individual records
    dataset = dataset.flat_map(tf.data.TFRecordDataset)

    return resnet.process_record_dataset(dataset, is_training, batch_size,
                                         _SHUFFLE_BUFFER, parse_record,
                                         num_epochs, num_parallel_calls)

Example #2

0

Show file

def input_fn(is_training,
             data,
             labels,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1,
             multi_gpu=False):
    """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
    dataset = tf.data.Dataset.from_tensor_slices((data, labels))

    if is_training:
        # Shuffle the input files
        dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)
    num_images = len(labels)
    return resnet.process_record_dataset(dataset, is_training, batch_size,
                                         num_images, parse_record, num_epochs,
                                         num_parallel_calls)

Example #3

0

Show file

File: cifar10_main.py Project: Pratkashyap/tensorflow_models

def input_fn(is_training,
             data_dir,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1):
    """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.

  Returns:
    A dataset that can be used for iteration.
  """
    filenames = get_filenames(is_training, data_dir)
    dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

    return resnet.process_record_dataset(dataset, is_training, batch_size,
                                         _NUM_IMAGES['train'], parse_record,
                                         num_epochs, num_parallel_calls)

Example #4

0

Show file

File: imagenet_main.py Project: NoPointExc/models

def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1):
  """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.Dataset.from_tensor_slices(filenames)

  if is_training:
    # Shuffle the input files
    dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)

  # Convert to individual records
  dataset = dataset.flat_map(tf.data.TFRecordDataset)

  return resnet.process_record_dataset(dataset, is_training, batch_size,
      _SHUFFLE_BUFFER, parse_record, num_epochs, num_parallel_calls)

Example #5

0

Show file

File: imagenet_main.py Project: forging2012/models

def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1, multi_gpu=False):
  """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.Dataset.from_tensor_slices(filenames)

  if is_training:
    # Shuffle the input files
    dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)

  num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES['validation']

  # Convert to individual records
  dataset = dataset.flat_map(tf.data.TFRecordDataset)

  return resnet.process_record_dataset(
      dataset, is_training, batch_size, _SHUFFLE_BUFFER, parse_record,
      num_epochs, num_parallel_calls, examples_per_epoch=num_images,
      multi_gpu=multi_gpu)

Example #6

0

Show file

File: cifar10_main.py Project: forging2012/models

def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1, multi_gpu=False):
  """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

  num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES['validation']

  return resnet.process_record_dataset(dataset, is_training, batch_size,
      _NUM_IMAGES['train'], parse_record, num_epochs, num_parallel_calls,
      examples_per_epoch=num_images, multi_gpu=multi_gpu)

Example #7

0

Show file

def input_fn(is_training,
             data_dir,
             channel_name,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1,
             multi_gpu=False,
             mode='File'):
    """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    channel_name: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
    dataset = None
    num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES[
        'validation']

    if mode == 'File':
        filenames = get_filenames(data_dir)
        dataset = tf.data.Dataset.from_tensor_slices(filenames)

        # Convert to individual records
        dataset = dataset.flat_map(tf.data.TFRecordDataset)
    else:
        generator = TFRecordDatasetGenerator(channel_name)
        dataset = Dataset.from_generator(generator, tf.string)

    return resnet.process_record_dataset(dataset,
                                         is_training,
                                         batch_size,
                                         parse_record,
                                         num_epochs,
                                         num_parallel_calls,
                                         examples_per_epoch=num_images,
                                         multi_gpu=multi_gpu)

Example #8

0

Show file

File: cifar10_main.py Project: NoPointExc/models

def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1):
  """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

  return resnet.process_record_dataset(dataset, is_training, batch_size,
      _NUM_IMAGES['train'], parse_record, num_epochs, num_parallel_calls)

Example #9

0

Show file

File: cifar10_main.py Project: nikoruhe54/RoastMeBot

def input_fn(is_training,
             data_dir,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1,
             multi_gpu=False):
    """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
    filenames = get_filenames(is_training, data_dir)
    dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

    num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES[
        'validation']

    return resnet.process_record_dataset(dataset,
                                         is_training,
                                         batch_size,
                                         _NUM_IMAGES['train'],
                                         parse_record,
                                         num_epochs,
                                         num_parallel_calls,
                                         examples_per_epoch=num_images,
                                         multi_gpu=multi_gpu)