Ejemplo n.º 1
0
def get_dataset(dir,
                batch_size,
                num_epochs,
                reshape_size,
                num_readers=1,
                augment_func=inception_augmentation,
                shuffle=1000):
    if not num_epochs:
        num_epochs = None
    filenames = [os.path.join(dir, i) for i in os.listdir(dir)]

    with tf.name_scope('input'):
        # TFRecordDataset opens a protobuf and reads entries line by line
        # could also be [list, of, filenames]
        dataset = tf.data.TFRecordDataset(filenames,
                                          num_parallel_reads=num_readers)
        # dataset = tf.data.TFRecordDataset(filenames)
        dataset = dataset.repeat(num_epochs)

        # map takes a python function and applies it to every sample
        dataset = dataset.map(decode)
        dataset = dataset.map(extract)
        dataset = dataset.map(cast_type)

        if augment_func is not None:
            print('Enabling Augmentation...')
            dataset = dataset.map(augment_func)
        dataset = dataset.map(set_parameter(reshape,
                                            reshape_size=reshape_size))

        if shuffle is not None:
            # the parameter is the queue size
            dataset = dataset.shuffle(shuffle + 3 * batch_size)
        dataset = dataset.batch(batch_size)
    return dataset
Ejemplo n.º 2
0
def get_dataset(dir,
                batch_size,
                num_epochs,
                reshape_size=[300, 300],
                padding='SAME'):
    if not num_epochs:
        num_epochs = None
    filenames = [os.path.join(dir, i) for i in os.listdir(dir)]

    with tf.name_scope('input'):
        # TFRecordDataset opens a protobuf and reads entries line by line
        # could also be [list, of, filenames]
        dataset = tf.data.TFRecordDataset(filenames)
        dataset = dataset.repeat(num_epochs)

        # map takes a python function and applies it to every sample
        dataset = dataset.map(decode)
        dataset = dataset.map(extract)
        dataset = dataset.map(cast_type)
        dataset = dataset.map(augment)
        dataset = dataset.map(normalize)
        dataset = dataset.map(set_parameter(reshape,
                                            reshape_size=reshape_size))

        # the parameter is the queue size
        dataset = dataset.shuffle(1000 + 3 * batch_size)
        dataset = dataset.batch(batch_size)
    return dataset
Ejemplo n.º 3
0
def get_dataset(dir,
                batch_size,
                num_epochs,
                reshape_size,
                padding='SAME',
                normalize=True):
    """Reads input data num_epochs times. AND Return the dataset

    Args:
      train: Selects between the training (True) and validation (False) data.
      batch_size: Number of examples per returned batch.
      num_epochs: Number of times to read the input data, or 0/None to
         train forever.
      padding:  if 'SAME' , have ceil(#samples / batch_size) * epoch_nums batches
                if 'VALID', have floor(#samples / batch_size) * epoch_nums batches
      normalize: whether normalize

    Returns:
      A tuple (images, labels), where:
      * images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS]
        in the range [-0.5, 0.5].
      * labels is an int32 tensor with shape [batch_size] with the true label,
        a number in the range [0, mnist.NUM_CLASSES).

      This function creates a one_shot_iterator, meaning that it will only iterate
      over the dataset once. On the other hand there is no special initialization
      required.
    """
    if not num_epochs:
        num_epochs = None
    filenames = [os.path.join(dir, i) for i in os.listdir(dir)]

    with tf.name_scope('input'):
        # TFRecordDataset opens a protobuf and reads entries line by line
        # could also be [list, of, filenames]
        dataset = tf.data.TFRecordDataset(filenames)
        dataset = dataset.repeat(num_epochs)

        # map takes a python function and applies it to every sample
        dataset = dataset.map(decode)
        dataset = dataset.map(extract)
        dataset = dataset.map(cast_type)
        dataset = dataset.map(augment)
        if normalize:
            dataset = dataset.map(normalize)
        dataset = dataset.map(set_parameter(reshape,
                                            reshape_size=reshape_size))

        # the parameter is the queue size
        dataset = dataset.shuffle(1000 + 3 * batch_size)
        dataset = dataset.batch(batch_size)
    return dataset