def get_dataset(dir, batch_size, num_epochs, reshape_size, num_readers=1, augment_func=inception_augmentation, shuffle=1000): if not num_epochs: num_epochs = None filenames = [os.path.join(dir, i) for i in os.listdir(dir)] with tf.name_scope('input'): # TFRecordDataset opens a protobuf and reads entries line by line # could also be [list, of, filenames] dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=num_readers) # dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.repeat(num_epochs) # map takes a python function and applies it to every sample dataset = dataset.map(decode) dataset = dataset.map(extract) dataset = dataset.map(cast_type) if augment_func is not None: print('Enabling Augmentation...') dataset = dataset.map(augment_func) dataset = dataset.map(set_parameter(reshape, reshape_size=reshape_size)) if shuffle is not None: # the parameter is the queue size dataset = dataset.shuffle(shuffle + 3 * batch_size) dataset = dataset.batch(batch_size) return dataset
def get_dataset(dir, batch_size, num_epochs, reshape_size=[300, 300], padding='SAME'): if not num_epochs: num_epochs = None filenames = [os.path.join(dir, i) for i in os.listdir(dir)] with tf.name_scope('input'): # TFRecordDataset opens a protobuf and reads entries line by line # could also be [list, of, filenames] dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.repeat(num_epochs) # map takes a python function and applies it to every sample dataset = dataset.map(decode) dataset = dataset.map(extract) dataset = dataset.map(cast_type) dataset = dataset.map(augment) dataset = dataset.map(normalize) dataset = dataset.map(set_parameter(reshape, reshape_size=reshape_size)) # the parameter is the queue size dataset = dataset.shuffle(1000 + 3 * batch_size) dataset = dataset.batch(batch_size) return dataset
def get_dataset(dir, batch_size, num_epochs, reshape_size, padding='SAME', normalize=True): """Reads input data num_epochs times. AND Return the dataset Args: train: Selects between the training (True) and validation (False) data. batch_size: Number of examples per returned batch. num_epochs: Number of times to read the input data, or 0/None to train forever. padding: if 'SAME' , have ceil(#samples / batch_size) * epoch_nums batches if 'VALID', have floor(#samples / batch_size) * epoch_nums batches normalize: whether normalize Returns: A tuple (images, labels), where: * images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS] in the range [-0.5, 0.5]. * labels is an int32 tensor with shape [batch_size] with the true label, a number in the range [0, mnist.NUM_CLASSES). This function creates a one_shot_iterator, meaning that it will only iterate over the dataset once. On the other hand there is no special initialization required. """ if not num_epochs: num_epochs = None filenames = [os.path.join(dir, i) for i in os.listdir(dir)] with tf.name_scope('input'): # TFRecordDataset opens a protobuf and reads entries line by line # could also be [list, of, filenames] dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.repeat(num_epochs) # map takes a python function and applies it to every sample dataset = dataset.map(decode) dataset = dataset.map(extract) dataset = dataset.map(cast_type) dataset = dataset.map(augment) if normalize: dataset = dataset.map(normalize) dataset = dataset.map(set_parameter(reshape, reshape_size=reshape_size)) # the parameter is the queue size dataset = dataset.shuffle(1000 + 3 * batch_size) dataset = dataset.batch(batch_size) return dataset