Example #1
0
    def __init__(self, raw_cifar10data, sess, model):
        assert isinstance(raw_cifar10data, CIFAR10Data)
        self.image_size = 32

        # create augmentation computational graph
        self.x_input_placeholder = tf.placeholder(tf.float32,
                                                  shape=[None, 32, 32, 3])
        padded = tf.map_fn(
            lambda img: tf.image.resize_image_with_crop_or_pad(
                img, self.image_size + 4, self.image_size + 4),
            self.x_input_placeholder)
        cropped = tf.map_fn(
            lambda img: tf.random_crop(
                img, [self.image_size, self.image_size, 3]), padded)
        flipped = tf.map_fn(lambda img: tf.image.random_flip_left_right(img),
                            cropped)
        self.augmented = flipped

        self.train_data = AugmentedDataSubset(raw_cifar10data.train_data, sess,
                                              self.x_input_placeholder,
                                              self.augmented)
        self.eval_data = AugmentedDataSubset(raw_cifar10data.eval_data, sess,
                                             self.x_input_placeholder,
                                             self.augmented)
        self.label_names = raw_cifar10data.label_names
Example #2
0
def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder):
    images_and_labels_list = []
    for _ in range(nrof_preprocess_threads):
        filenames, label, control = input_queue.dequeue()
        images = []
        for filename in tf.unstack(filenames):
            file_contents = tf.read_file(filename)
            image = tf.image.decode_image(file_contents, 3)
            image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE),
                            lambda: tf.py_func(random_rotate_image, [image], tf.uint8),
                            lambda: tf.identity(image))
            image = tf.cond(get_control_flag(control[0], RANDOM_CROP),
                            lambda: tf.random_crop(image, image_size + (3,)),
                            lambda: tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1]))
            image = tf.cond(get_control_flag(control[0], RANDOM_FLIP),
                            lambda: tf.image.random_flip_left_right(image),
                            lambda: tf.identity(image))
            image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION),
                            lambda: (tf.cast(image, tf.float32) - 127.5) / 128.0,
                            lambda: tf.image.per_image_standardization(image))
            image = tf.cond(get_control_flag(control[0], FLIP),
                            lambda: tf.image.flip_left_right(image),
                            lambda: tf.identity(image))
            # pylint: disable=no-member
            image.set_shape(image_size + (3,))
            images.append(image)
        images_and_labels_list.append([images, label])

    image_batch, label_batch = tf.train.batch_join(
        images_and_labels_list, batch_size=batch_size_placeholder,
        shapes=[image_size + (3,), ()], enqueue_many=True,
        capacity=4 * nrof_preprocess_threads * 100,
        allow_smaller_final_batch=True)

    return image_batch, label_batch
Example #3
0
def augment(images, height, width):
    """Randomly flips and crops `images` to `height` by `width`."""
    size = [height, width, tf.shape(images)[-1]]
    images = tf.random_crop(images, size)
    images = tf.image.random_flip_left_right(images)
    images = tf.image.random_flip_up_down(images)
    return images
Example #4
0
  def preprocess_example(self, example, mode, hparams):

    # Crop to target shape instead of down-sampling target, leaving target
    # of maximum available resolution.
    target_shape = (self.output_dim, self.output_dim, self.num_channels)
    example["targets"] = tf.random_crop(example["targets"], target_shape)

    example["inputs"] = image_utils.resize_by_area(example["targets"],
                                                   self.input_dim)

    if self.inpaint_fraction is not None and self.inpaint_fraction > 0:

      mask = random_square_mask((self.input_dim,
                                 self.input_dim,
                                 self.num_channels),
                                self.inpaint_fraction)

      example["inputs"] = tf.multiply(
          tf.convert_to_tensor(mask, dtype=tf.int64),
          example["inputs"])

      if self.input_dim is None:
        raise ValueError("Cannot train in-painting for examples with "
                         "only targets (i.e. input_dim is None, "
                         "implying there are only targets to be "
                         "generated).")

    return example
Example #5
0
 def _pre_process(x):
     x = tf.pad(x, [[4, 4], [4, 4], [0, 0]])
     x = tf.random_crop(x, [32, 32, 3], seed=self.seed)
     x = tf.image.random_flip_left_right(x, seed=self.seed)
     if self.data_format == "NCHW":
         x = tf.transpose(x, [2, 0, 1])
     return x
Example #6
0
def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 
        random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True):
    
    images = ops.convert_to_tensor(image_list, dtype=tf.string)
    labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
    
    # Makes an input queue
    input_queue = tf.train.slice_input_producer([images, labels],
        num_epochs=max_nrof_epochs, shuffle=shuffle)

    images_and_labels = []
    for _ in range(nrof_preprocess_threads):
        image, label = read_images_from_disk(input_queue)
        if random_rotate:
            image = tf.py_func(random_rotate_image, [image], tf.uint8)
        if random_crop:
            image = tf.random_crop(image, [image_size, image_size, 3])
        else:
            image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)
        if random_flip:
            image = tf.image.random_flip_left_right(image)
        #pylint: disable=no-member
        image.set_shape((image_size, image_size, 3))
        image = tf.image.per_image_standardization(image)
        images_and_labels.append([image, label])

    image_batch, label_batch = tf.train.batch_join(
        images_and_labels, batch_size=batch_size,
        capacity=4 * nrof_preprocess_threads * batch_size,
        allow_smaller_final_batch=True)
  
    return image_batch, label_batch
Example #7
0
def aug_train(image, aux):
    aug_image = tf.pad(image, [[4, 4], [4, 4], [0, 0]])
    aug_image = tf.random_crop(aug_image, [32, 32, 3])
    aug_image = tf.image.random_flip_left_right(aug_image)
    aug_image = tf.image.random_contrast(aug_image, 0.75, 1.25)
    aug_image = (aug_image - aux['mean']) / aux['std']
    return aug_image
Example #8
0
def augmentation(image, aug_img_h, aug_img_w):
    seed = random.randint(0, 2**31 - 1)
    ori_image_shape = tf.shape(image)
    image = tf.image.random_flip_left_right(image, seed=seed)
    image = tf.image.resize_images(image, [aug_img_h, aug_img_w])
    image = tf.random_crop(image, ori_image_shape, seed=seed)
    return image
Example #9
0
def batch_random_crop(images_list, crop_height, crop_width):
  """Apply efficient batch data transformations.

  Args:
    images_list: a list of image tensors.
    crop_height: the height of the crop to make.
    crop_width: the width of the crop to make.

  Returns:
    Feature list where images_list[0] ia unchanged and images in images_list[1] have all been cropped.
  """

  images_list[1] = tf.random_crop(images_list[1], [FLAGS.train_batch_size, crop_height, crop_width, 3])
  # every image in the batch is cropped identically, but separate batches will have differing crops

  # to crop every image in every batch differently:
  # individual_imgs = tf.split(images_list[1], num_or_size_splits=FLAGS.train_batch_size, axis=0)
  # print('individual_imgs[0].shape', individual_imgs[0].shape)
  # for idx, img in individual_imgs:
  #   individual_imgs[idx] = tf.random_crop(img, [1, crop_height, crop_width, 3])
  # cropped_batch =tf.concat(individual_imgs, 0)
  # print('cropped_batch shape', cropped_batch.shape)
  # above gives error: OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did not convert this function.Try decorating it directly with @ tf.function.
  # even with @tf.function decoration above def batch_random_crop()

  return images_list
Example #10
0
 def preprocess(self, image):
     """Preprocesses a single image in [height, width, depth] layout."""
     if self.subset == 'train' and self.use_distortion:
         # Pad 4 pixels on each dimension of feature map, done in mini-batch
         image = tf.image.resize_image_with_crop_or_pad(image, 40, 40)
         image = tf.random_crop(image, [32, 32, 3])
         image = tf.image.random_flip_left_right(image)
     return image
Example #11
0
 def random_crop(image, segmentation):
     # crop out [128,128] shape
     crop_shape = [constants.SIZE, constants.SIZE, 2]
     cropped = tf.random_crop(
         tf.stack([image, segmentation], axis=2),
         crop_shape)
     image, segmentation = tf.split(cropped, 2, axis=2)
     return image, segmentation
Example #12
0
def basic_augmentation(image, image_height, image_width, seed=None):
  """Augment image according to NasNet paper (random flip + random crop)."""

  # source: https://arxiv.org/pdf/1707.07012.pdf appendix A.1
  padding = 4
  image = tf.image.random_flip_left_right(image, seed=seed)

  image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
  image = tf.random_crop(image, [image_height, image_width, 3], seed=seed)
  return image
Example #13
0
def augment_cifar(batch_data, is_training=False):
  image = batch_data
  if is_training:
    image = tf.image.resize_image_with_crop_or_pad(batch_data, 32 + 8, 32 + 8)
    i = image.get_shape().as_list()[0]
    image = tf.random_crop(image, [i, 32, 32, 3])
    image = tf.image.random_flip_left_right(image)
  image = tf.image.per_image_standardization(image)

  return image
Example #14
0
def _read_and_decode(filename_queue, image_pixel=96, distort=0):
  """Read a norb tf record file."""
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
      serialized_example,
      # Defaults are not specified since both keys are required.
      features={
          'image_raw': tf.FixedLenFeature([], tf.string),
          'label': tf.FixedLenFeature([], tf.int64),
          'height': tf.FixedLenFeature([], tf.int64),
          'width': tf.FixedLenFeature([], tf.int64),
          'depth': tf.FixedLenFeature([], tf.int64),
          'meta': tf.FixedLenFeature([4], tf.int64),
      })

  # Convert from a scalar string tensor (whose single string has
  # length image_pixels) to a uint8 tensor with shape
  # [image_pixels].
  image = tf.decode_raw(features['image_raw'], tf.uint8)
  height = tf.cast(features['height'], tf.int32)
  depth = tf.cast(features['depth'], tf.int32)
  image = tf.reshape(image, tf.stack([depth, height, height]))
  image = tf.transpose(image, [1, 2, 0])
  image = tf.cast(image, tf.float32)
  if image_pixel < 96:
    print('image resizing to {}'.format(image_pixel))
    image = tf.image.resize_images(image, [image_pixel, image_pixel])
    orig_images = image

  if image_pixel == 48:
    new_dim = 32
  elif image_pixel == 32:
    new_dim = 22
  if distort == 1:
    image = tf.image.random_brightness(image, max_delta=63)
    image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
    image = tf.random_crop(image, tf.stack([new_dim, new_dim, depth]))
    # 0.26179938779 is 15 degress in radians
    image = tf.image.per_image_standardization(image)
    image_pixel = new_dim
  elif distort == 2:
    image = tf.image.resize_image_with_crop_or_pad(image, new_dim, new_dim)
    image = tf.image.per_image_standardization(image)
    image_pixel = new_dim
  else:
    image = image * (1.0 / 255.0)
    image = tf.div(
        tf.subtract(image, tf.reduce_min(image)),
        tf.subtract(tf.reduce_max(image), tf.reduce_min(image)))

  # Convert label from a scalar uint8 tensor to an int32 scalar.
  label = tf.cast(features['label'], tf.int32)

  return image, label, image_pixel, orig_images
Example #15
0
def read_data(image_paths, label_list, image_size, batch_size, max_nrof_epochs, num_threads, shuffle, random_flip,
              random_brightness, random_contrast):
    """
    Creates Tensorflow Queue to batch load images. Applies transformations to images as they are loaded.
    :param random_brightness: 
    :param random_flip: 
    :param image_paths: image paths to load
    :param label_list: class labels for image paths
    :param image_size: size to resize images to
    :param batch_size: num of images to load in batch
    :param max_nrof_epochs: total number of epochs to read through image list
    :param num_threads: num threads to use
    :param shuffle: Shuffle images
    :param random_flip: Random Flip image
    :param random_brightness: Apply random brightness transform to image
    :param random_contrast: Apply random contrast transform to image
    :return: images and labels of batch_size
    """

    images = ops.convert_to_tensor(image_paths, dtype=tf.string)
    labels = ops.convert_to_tensor(label_list, dtype=tf.int32)

    # Makes an input queue
    input_queue = tf.train.slice_input_producer((images, labels),
                                                num_epochs=max_nrof_epochs, shuffle=shuffle, )

    images_labels = []
    imgs = []
    lbls = []
    for _ in range(num_threads):
        image, label = read_image_from_disk(filename_to_label_tuple=input_queue)
        image = tf.random_crop(image, size=[image_size, image_size, 3])
        image.set_shape((image_size, image_size, 3))
        image = tf.image.per_image_standardization(image)

        if random_flip:
            image = tf.image.random_flip_left_right(image)

        if random_brightness:
            image = tf.image.random_brightness(image, max_delta=0.3)

        if random_contrast:
            image = tf.image.random_contrast(image, lower=0.2, upper=1.8)

        imgs.append(image)
        lbls.append(label)
        images_labels.append([image, label])

    image_batch, label_batch = tf.train.batch_join(images_labels,
                                                   batch_size=batch_size,
                                                   capacity=4 * num_threads,
                                                   enqueue_many=False,
                                                   allow_smaller_final_batch=True)
    return image_batch, label_batch
Example #16
0
def crop(image, is_training, crop_size):
  h, w, c = crop_size[0], crop_size[1], image.shape[-1]

  if is_training:
    return tf.random_crop(image, [h, w, c])
  else:
    # Central crop for now. (See Table 5 in Appendix of
    # https://arxiv.org/pdf/1703.07737.pdf for why)
    dy = (tf.shape(image)[0] - h)//2
    dx = (tf.shape(image)[1] - w)//2
    return tf.image.crop_to_bounding_box(image, dy, dx, h, w)
Example #17
0
 def augment_each(img):
     if hps.random_crop:
         img = tf.random_crop(img, [hps.height, hps.width, hps.n_col])
     else:
         img = tf.image.central_crop(img, hps.height / hps.height_pad)
     if hps.dataset not in ['mnist', 'gts', 'svhn'] and hps.fl_mirroring:
         img = tf.image.random_flip_left_right(img)
     img = tf.image.random_brightness(img, max_delta=0.1)
     img = tf.minimum(tf.maximum(img, 0.0), 1.0)
     img = tf.image.random_contrast(img, lower=0.6, upper=1.4)
     img = tf.minimum(tf.maximum(img, 0.0), 1.0)
     return img
Example #18
0
def parse_function_train(image, label):
    """Sampling function for training dataset."""
    image = tf.cast(image, tf.float32)
    image_orig = tf.reshape(image, [IMG_SIZE, IMG_SIZE, NUM_CHANNELS])
    image = tf.image.resize_image_with_crop_or_pad(image_orig,
                                                   IMG_SIZE + CROP_AUG,
                                                   IMG_SIZE + CROP_AUG)
    image = tf.random_crop(image, [IMG_SIZE, IMG_SIZE, NUM_CHANNELS])
    image = tf.image.random_flip_left_right(image)
    image = tf.image.per_image_standardization(image)
    label = tf.cast(label, tf.int32)
    return image, image_orig, label
def image_augmentation(images, do_colors=False, crop_size=None):
  """Image augmentation: cropping, flipping, and color transforms."""
  if crop_size is None:
    crop_size = [299, 299]
  images = tf.random_crop(images, crop_size + [3])
  images = tf.image.random_flip_left_right(images)
  if do_colors:  # More augmentation, but might be slow.
    images = tf.image.random_brightness(images, max_delta=32. / 255.)
    images = tf.image.random_saturation(images, lower=0.5, upper=1.5)
    images = tf.image.random_hue(images, max_delta=0.2)
    images = tf.image.random_contrast(images, lower=0.5, upper=1.5)
  return images
Example #20
0
def resize_and_random_crop_image(image):
    minval = FLAGS.image_size  # 224
    maxval = 280
    new_height = tf.random_uniform(shape=[],
                                   minval=minval,
                                   maxval=maxval,
                                   dtype=tf.int32)
    new_width = new_height
    resized_image = tf.image.resize_images(image, [new_height, new_width])
    crop_image = tf.random_crop(resized_image,
                                [FLAGS.image_size, FLAGS.image_size, 3])
    return crop_image
Example #21
0
def train_preprocess_fn(image, label):
    """Preprocess a single training image of layout [height, width, depth]."""
    # Resize the image to add four extra pixels on each side.
    image = tf.image.resize_image_with_crop_or_pad(image, _HEIGHT + 8,
                                                   _WIDTH + 8)

    # Randomly crop a [_HEIGHT, _WIDTH] section of the image.
    image = tf.random_crop(image, [_HEIGHT, _WIDTH, _DEPTH])

    # Randomly flip the image horizontally.
    image = tf.image.random_flip_left_right(image)

    return image, label
def cifar_image_augmentation(images):
  """Image augmentation suitable for CIFAR-10/100.

  As described in https://arxiv.org/pdf/1608.06993v3.pdf (page 5).

  Args:
    images: a Tensor.
  Returns:
    Tensor of the same shape as images.
  """
  images = tf.image.resize_image_with_crop_or_pad(images, 40, 40)
  images = tf.random_crop(images, [32, 32, 3])
  images = tf.image.random_flip_left_right(images)
  return images
 def random_image(image):
   """Randmly translates and flips images."""
   image = tf.reshape(image, init_shape)
   current_shape = init_shape
   if expand_shape is not None and expand_shape != current_shape:
     if expand_shape[-1] != current_shape[-1]:
       raise ValueError('Number channels is not specified correctly.')
     image = tf.image.resize_image_with_crop_or_pad(
         image, expand_shape[0], expand_shape[1])
     current_shape = expand_shape
   if crop_shape is not None and crop_shape != current_shape:
     image = tf.random_crop(image, crop_shape)
   if vertical_flip:
     image = tf.image.random_flip_left_right(image)
   return image
Example #24
0
def preprocess_train(x, width, height):
    """Pre-processing applied to training data set.

  Args:
    x: Input image float32 tensor.
    width: int specifying intended width in pixels of image after preprocessing.
    height: int specifying intended height in pixels of image after
      preprocessing.
  Returns:
    x: transformed input with random crops, flips and reflection.
  """
    x = pad_input(x, crop_dim=4)
    x = tf.random_crop(x, [width, height, 3])
    x = tf.image.random_flip_left_right(x)
    return x
Example #25
0
def distorted_inputs(data_dir, batch_size):
    """对cifar训练集中的image数据进行变换,图像预处理
    param data_dir: 数据所处文件夹名称
    param batch_size: 批次大小
    return:
           images: 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3]
           labels: 1D tensor of [batch_size] size
    """
    filename = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
                for i in range(1, 6)]
    for f in filename:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)

    filename_queue = tf.train.string_input_producer(filename)

    # 数据扩增
    with tf.name_scope('data_augmentation'):
        read_input = read_cifar10(filename_queue)
        reshaped_image = tf.cast(read_input.uint8image, tf.float32)

        height = IMAGE_SIZE
        width = IMAGE_SIZE

        # tf.random_crop 对输入图像进行随意裁剪
        distored_image = tf.random_crop(reshaped_image, [height, width, 3])
        # tf.image.random_flip_left_right 随机左右翻转图片
        distored_image = tf.image.random_flip_left_right(distored_image)
        # tf.image.random_brightness在某范围随机调整图片亮度
        distored_image = tf.image.random_brightness(
            distored_image, max_delta=63)
        # tf.image.random_contrast 在某范围随机调整图片对比度
        distored_image = tf.image.random_contrast(
            distored_image, lower=0.2, upper=1.8)
        # 归一化, 三维矩阵中的数字均值为0,方差为1, 白话操作
        float_image = tf.image.per_image_standardization(distored_image)

        float_image.set_shape([height, width, 3])
        read_input.label.set_shape([1])

        min_fraction_of_examples_in_queue = 0.4
        min_queue_examples = int(
            NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue)
    image_batch, label_batch = tf.train.shuffle_batch([float_image, read_input.label], batch_size=batch_size,
                                                      capacity=min_queue_examples + 3 * batch_size,
                                                      min_after_dequeue=min_queue_examples)
    tf.summary.image('image_batch_train', image_batch)
    return image_batch, tf.reshape(label_batch, [batch_size])
def preprocess_for_train(image,
                         output_height,
                         output_width,
                         padding=_PADDING,
                         add_image_summaries=True,
                         use_grayscale=False):
    """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
    [`resize_size_min`, `resize_size_max`].

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    padding: The amound of padding before and after each dimension of the image.
    add_image_summaries: Enable image summaries.
    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
  """
    if add_image_summaries:
        tf.summary.image('image', tf.expand_dims(image, 0))

    # Transform the image to floats.
    image = tf.to_float(image)
    if use_grayscale:
        image = tf.image.rgb_to_grayscale(image)
    if padding > 0:
        image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
    # Randomly crop a [height, width] section of the image.
    distorted_image = tf.random_crop(image, [output_height, output_width, 3])

    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)

    if add_image_summaries:
        tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))

    # Because these operations are not commutative, consider randomizing
    # the order their operation.
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image,
                                               lower=0.2,
                                               upper=1.8)
    # Subtract off the mean and divide by the variance of the pixels.
    return tf.image.per_image_standardization(distorted_image)
Example #27
0
def _read_and_decode(filename_queue, image_pixel=28, distort=0):
    """Read tf records of MNIST images and labels."""
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'depth': tf.FixedLenFeature([], tf.int64)
        })

    # Convert from a scalar string tensor (whose single string has
    # length image_pixels) to a uint8 tensor with shape
    # [image_pixels].
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image = tf.reshape(image, [image_pixel, image_pixel, 1])
    print(image.get_shape()[0].value)
    image.set_shape([image_pixel, image_pixel, 1])

    # OPTIONAL: Could reshape into a 28x28 image and apply distortions
    # here.  Since we are not applying any distortions in this
    # example, and the next step expects the image to be flattened
    # into a vector, we don't bother.

    # Convert from [0, 255] -> [-0.5, 0.5] floats.
    image = tf.cast(image, tf.float32) * (1. / 255)
    if distort == 1:
        image = tf.reshape(image, [28, 28])
        image = tf.random_crop(image, [24, 24])
        # 0.26179938779 is 15 degress in radians
        # image = contrib_image.rotate(image,
        #                             random.uniform(-0.26179938779, 0.26179938779))
        image = tf.reshape(image, [24, 24, 1])
    elif distort == 2:
        image = tf.reshape(image, [28, 28])
        image = tf.expand_dims(image, 2)
        image = tf.image.central_crop(image, central_fraction=24 / 28)
        image = tf.squeeze(image, 2)
        image = tf.reshape(image, [24, 24, 1])

    # Convert label from a scalar uint8 tensor to an int32 scalar.
    label = tf.cast(features['label'], tf.int32)

    return image, label
Example #28
0
def prepare_image_with_tensorflow(image,
                                  target_width=299,
                                  target_height=299,
                                  max_zoom=0.2):
    """Zooms and crops the image randomly for data augmentation."""

    # First, let's find the largest bounding box with the target size ratio that fits within the image
    image_shape = tf.cast(tf.shape(image), tf.float32)
    height = image_shape[0]
    width = image_shape[1]
    image_ratio = width / height
    target_image_ratio = target_width / target_height
    crop_vertically = image_ratio < target_image_ratio
    crop_width = tf.cond(crop_vertically, lambda: width,
                         lambda: height * target_image_ratio)
    crop_height = tf.cond(crop_vertically, lambda: width / target_image_ratio,
                          lambda: height)

    # Now let's shrink this bounding box by a random factor (dividing the dimensions by a random number
    # between 1.0 and 1.0 + `max_zoom`.
    resize_factor = tf.random_uniform(shape=[],
                                      minval=1.0,
                                      maxval=1.0 + max_zoom)
    crop_width = tf.cast(crop_width / resize_factor, tf.int32)
    crop_height = tf.cast(crop_height / resize_factor, tf.int32)
    box_size = tf.stack([crop_height, crop_width, 3])  # 3 = number of channels

    # Let's crop the image using a random bounding box of the size we computed
    image = tf.random_crop(image, box_size)

    # Let's also flip the image horizontally with 50% probability:
    image = tf.image.random_flip_left_right(image)

    # The resize_bilinear function requires a 4D tensor (a batch of images)
    # so we need to expand the number of dimensions first:
    image_batch = tf.expand_dims(image, 0)

    # Finally, let's resize the image to the target dimensions. Note that this function
    # returns a float32 tensor.
    image_batch = tf.image.resize_bilinear(image_batch,
                                           [target_height, target_width])
    image = image_batch[
        0] / 255  # back to a single image, and scale the colors from 0.0 to 1.0
    return image
Example #29
0
def vqa_v2_preprocess_image(
    image,
    height,
    width,
    mode,
    resize_side=512,
    distort=True,
    image_model_fn="resnet_v1_152",
):
    """vqa v2 preprocess image."""

    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    assert resize_side > 0
    if resize_side:
        image = _aspect_preserving_resize(image, resize_side)
    if mode == tf.estimator.ModeKeys.TRAIN:
        image = tf.random_crop(image, [height, width, 3])
    else:
        # Central crop, assuming resize_height > height, resize_width > width.
        image = tf.image.resize_image_with_crop_or_pad(image, height, width)

    image = tf.clip_by_value(image, 0.0, 1.0)

    if mode == tf.estimator.ModeKeys.TRAIN and distort:
        image = _flip(image)
        num_distort_cases = 4
        # pylint: disable=unnecessary-lambda
        image = _apply_with_random_selector(
            image,
            lambda x, ordering: _distort_color(x, ordering),
            num_cases=num_distort_cases)

    if image_model_fn.startswith("resnet_v1"):
        # resnet_v1 uses vgg preprocessing
        image = image * 255.
        image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
    elif image_model_fn.startswith("resnet_v2"):
        # resnet v2 uses inception preprocessing
        image = tf.subtract(image, 0.5)
        image = tf.multiply(image, 2.0)

    return image
Example #30
0
def transform(image, seed):
    r = image
    if a.mode == 'train':  # augment image by flipping and cropping
        if a.fliplr:
            r = tf.image.random_flip_left_right(r, seed=seed)
        if a.flipud:
            r = tf.image.random_flip_up_down(r, seed=seed)
        if a.transpose:
            r = random_transpose(r, seed=seed)

        r = tf.random_crop(r, size=[a.patch_size, a.patch_size, 3], seed=seed)

        r.set_shape(
            [a.patch_size, a.patch_size, 3]
        )  # must do this if tf.image.resize is not used, otherwise shape unknown

    else:  # use full sized original image
        r.set_shape([a.image_height, a.image_width, 3])  # use full size image

    return r