Ejemplo n.º 1
0
    def image_augmentation(self, train_data, test_data):
        train_data = tf.map_fn(lambda img: tf.image.flip_left_right(img), train_data)
        train_data = tf.map_fn(lambda img: tf.image.random_brightness(img,max_delta=63), train_data)
        train_data = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.2, upper=1.8),train_data)

        if self.params['use_grayscale']:
            train_data = tf.map_fn(lambda img: tf.image.rgb_to_grayscale(img), train_data)
        if self.params['use_gradient_images']:
            train_data = self.apply_sobel(train_data)
        # self.input_real = tf.map_fn(lambda img: tf.image.per_image_standardization(img), self.input_real)

        train_data = tf.map_fn(lambda img: tf.image.per_image_standardization(img),train_data)
        test_data = tf.map_fn(lambda img: tf.image.per_image_standardization(img),test_data)

        test_data = test_data
        if self.params['use_grayscale']:
            test_data = tf.map_fn(lambda img: tf.image.rgb_to_grayscale(img), test_data)
        if self.params['use_gradient_images']:
            test_data = self.apply_sobel(test_data)
        # self.input_test = tf.map_fn(lambda img: tf.image.per_image_standardization(img), self.input_test)

        train_data = tf.map_fn(lambda img: tf.image.resize_image_with_crop_or_pad(img,30,30),train_data)
        train_data = tf.map_fn(lambda img: tf.image.resize_image_with_crop_or_pad(img,42,42),train_data)
        if self.params['use_grayscale']:
            train_data = tf.map_fn(lambda img: tf.random_crop(img,[32,32,1]),train_data)
        else:
            train_data = tf.map_fn(lambda img: tf.random_crop(img,[32,32,3]),train_data)

        return train_data, test_data
Ejemplo n.º 2
0
def pre_process_img(image):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=32./255)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    image = tf.random_crop(image, [default_height-np.random.randint(0, 4), default_width-np.random.randint(0, 4), 1])
    image = tf.image.resize_images(image, [default_height, default_width])
    return image
Ejemplo n.º 3
0
  def preprocess_example(self, example, mode, hparams):

    # Crop to target shape instead of down-sampling target, leaving target
    # of maximum available resolution.
    target_shape = (self.output_dim, self.output_dim, self.num_channels)
    example["targets"] = tf.random_crop(example["targets"], target_shape)

    example["inputs"] = image_utils.resize_by_area(example["targets"],
                                                   self.input_dim)

    if self.inpaint_fraction is not None and self.inpaint_fraction > 0:

      mask = random_square_mask((self.input_dim,
                                 self.input_dim,
                                 self.num_channels),
                                self.inpaint_fraction)

      example["inputs"] = tf.multiply(
          tf.convert_to_tensor(mask, dtype=tf.int64),
          example["inputs"])

      if self.input_dim is None:
        raise ValueError("Cannot train in-painting for examples with "
                         "only targets (i.e. input_dim is None, "
                         "implying there are only targets to be "
                         "generated).")

    return example
Ejemplo n.º 4
0
def preprocess_for_train(image,
                         output_height,
                         output_width,
                         padding=_PADDING):
  """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
    [`resize_size_min`, `resize_size_max`].

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    padding: The amound of padding before and after each dimension of the image.

  Returns:
    A preprocessed image.
  """
  padded_image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(padded_image,
                                   [output_height, output_width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)

  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_whitening(distorted_image)
Ejemplo n.º 5
0
def read_input(image_queue):
    # Read the images and generate the decode from PNG image
    imageReader = tf.WholeFileReader()
    image_key, image_value = imageReader.read(image_queue)
    image_decode = tf.image.decode_png(image_value, channels=1)
    image_decode = tf.cast(image_decode, tf.float32)
    # Preprocess data
    image_key = rename_image_filename(image_key)    # rename image filename 
    label = search_label(image_key)
    # CREATE OBJECT
    class Record(object):
        pass
    record = Record()
    # Instantiate object
    record.key = image_key
    record.label = tf.cast(label, tf.int32)
    record.image = image_decode
    # PROCESSING IMAGES
    # reshaped_image = tf.cast(record.image, tf.float32)
    # height = 245
    # width = 320
    height = 96
    width = 96
    # Image processing for training the network. Note the many random distortions applied to the image.
    # Randomly crop a [height, width] section of the image.
    distorted_image = tf.random_crop(record.image, [height, width, 1])
    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)
    # Because these operations are not commutative, consider randomizing randomize the order their operation.
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)
    # Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_whitening(distorted_image)
    return generate_train_batch(record.label, float_image)
Ejemplo n.º 6
0
def random_distort_image(image):
  distorted_image = image
  distorted_image = tf.image.pad_to_bounding_box(
    image, 4, 4, 40, 40)  # pad 4 pixels to each side
  distorted_image = tf.random_crop(distorted_image, [32, 32, 3])
  distorted_image = tf.image.random_flip_left_right(distorted_image)
  return distorted_image
Ejemplo n.º 7
0
def distort_inputs(reshaped_image):
  distorted_image = tf.random_crop(reshaped_image, imshape)
  distorted_image = tf.image.random_flip_left_right(distorted_image)
  distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)
  float_image = tf.image.per_image_whitening(distorted_image)
  return float_image
Ejemplo n.º 8
0
 def random_shift(v):
     if random_shift_y:
         v = tf.concat([v[-random_shift_y:], v, v[:random_shift_y]], 0)
     if random_shift_x:
         v = tf.concat([v[:, -random_shift_x:], v, v[:, :random_shift_x]],
                       1)
     return tf.random_crop(v, [resize[0], resize[1], size[2]])
Ejemplo n.º 9
0
 def __imagenet_data_process_function(self, x, y):
     with tf.name_scope("imagenet_data_aug") as scope:
         #random scale
         #apparently, this works better than what we have:
         #https://github.com/facebook/fb.resnet.torch
         #but let's use the 'original' formulation for now
         #randomly sample a size in specified range
         random_size = tf.squeeze(tf.random_uniform((1, 1), 256, 480, dtype=tf.int32, name="random_scale_size"))
         #rescale smaller size with this factor
         tf.cond(tf.greater(tf.shape(x)[0], tf.shape(x)[1]), 
             lambda: tf.image.resize_images(x, [tf.shape(x)[0] * (tf.shape(x)[1] / random_size), random_size]),
             lambda: tf.image.resize_images(x, [random_size, tf.shape(x)[1] * (tf.shape(x)[0] / random_size)]))
         x = tf.image.resize_images(x, [224, 224])
         #random flip
         x = tf.image.flip_left_right(x)
         #random crop
         x = tf.random_crop(x, [224, 224, 3])
         #colour augmentation
         #this is a little more involved than I first thought
         #lets pick the inception colour distortion
         #https://github.com/tensorflow/models/blob/master/inception/inception/image_processing.py
         x = tf.image.random_brightness(x, max_delta=32. / 255.)
         x = tf.image.random_saturation(x, lower=0.5, upper=1.5)
         x = tf.image.random_hue(x, max_delta=0.2)
         x = tf.image.random_contrast(x, lower=0.5, upper=1.5)
         x = tf.clip_by_value(x, 0.0, 1.0)
         #normalisation
         x = tf.image.per_image_standardization(x)
     return [x, y]
Ejemplo n.º 10
0
def aug_train(image, aux):
    aug_image = tf.pad(image, [[4, 4], [4, 4], [0, 0]])
    aug_image = tf.random_crop(aug_image, [32, 32, 3])
    aug_image = tf.image.random_flip_left_right(aug_image)
    aug_image = tf.image.random_contrast(aug_image, 0.75, 1.25)
    aug_image = (aug_image - aux['mean']) / aux['std']
    return aug_image
Ejemplo n.º 11
0
 def testNoOp(self):
   # No random cropping is performed since the size is value.shape.
   for shape in (2, 1, 1), (2, 1, 3), (4, 5, 3):
     value = np.arange(0, np.prod(shape), dtype=np.int32).reshape(shape)
     with self.test_session():
       crop = tf.random_crop(value, shape).eval()
       self.assertAllEqual(crop, value)
Ejemplo n.º 12
0
def distorted_inputs(data_dir, batch_size):
    filenames = [os.path.join(data_dir, "data_batch_%d.bin" % i) for i in xrange(1, 6)]
    print(filenames)
    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError("Failed to find file: " + f)

    filename_queue = tf.train.string_input_producer(filenames)

    read_input = read_cifar10(filename_queue)
    reshaped_image = tf.cast(read_input.uint8image, tf.float32)

    height = IMAGE_SIZE
    width = IMAGE_SIZE

    distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

    distorted_image = tf.image.random_flip_left_right(distorted_image)

    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)

    float_image = tf.image.per_image_whitening(distorted_image)

    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue)
    print(
        "Filling queue with %d CIFAR images before starting to train. "
        "This will take a few minutes." % min_queue_examples
    )

    return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size)
def image_batch(image_paths, batch_size, load_size=286, crop_size=256, channels=3, shuffle=True,
                num_threads=4, min_after_dequeue=100, allow_smaller_final_batch=False):
    """ for jpg and png files """
    # queue and reader
    img_queue = tf.train.string_input_producer(image_paths, shuffle=shuffle)
    reader = tf.WholeFileReader()

    # preprocessing
    _, img = reader.read(img_queue)
    img = tf.image.decode_image(img, channels=3)
    '''
    tf.image.random_flip_left_right should be used before tf.image.resize_images,
    because tf.image.decode_image reutrns a tensor without shape which makes
    tf.image.resize_images collapse. Maybe it's a bug!
    '''
    img = tf.image.random_flip_left_right(img)
    img = tf.image.resize_images(img, [load_size, load_size])
    img = tf.random_crop(img, [crop_size, crop_size, channels])
    img = tf.cast(img, tf.float32) / 127.5 - 1

    # batch
    if shuffle:
        capacity = min_after_dequeue + (num_threads + 1) * batch_size
        img_batch = tf.train.shuffle_batch([img],
                                           batch_size=batch_size,
                                           capacity=capacity,
                                           min_after_dequeue=min_after_dequeue,
                                           num_threads=num_threads,
                                           allow_smaller_final_batch=allow_smaller_final_batch)
    else:
        img_batch = tf.train.batch([img],
                                   batch_size=batch_size,
                                   allow_smaller_final_batch=allow_smaller_final_batch)
    return img_batch, len(image_paths)
Ejemplo n.º 14
0
  def _parser(serialized_example):
    """Parses a single tf.Example into image and label tensors."""
    features = tf.parse_single_example(
        serialized_example,
        features={
            "image": tf.FixedLenFeature([], tf.string),
            "label": tf.FixedLenFeature([], tf.int64),
        })
    image = tf.decode_raw(features["image"], tf.uint8)
    # Initially reshaping to [H, W, C] does not work
    image = tf.reshape(image, [NUM_CHANNEL, IMAGE_HEIGHT, IMAGE_WIDTH])
    # This is needed for `tf.image.resize_image_with_crop_or_pad`
    image = tf.transpose(image, [1, 2, 0])

    image = tf.cast(image, dtype)
    label = tf.cast(features["label"], tf.int32)

    if data_aug:
      image = tf.image.resize_image_with_crop_or_pad(image, IMAGE_HEIGHT + 4,
                                                     IMAGE_WIDTH + 4)
      image = tf.random_crop(image, [IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNEL])
      image = tf.image.random_flip_left_right(image)

    if data_format == "channels_first":
      image = tf.transpose(image, [2, 0, 1])

    if div255:
      image /= 255.

    return image, label
Ejemplo n.º 15
0
def create_inputs_norb(is_train: bool, epochs: int):

    import re
    if is_train:
        CHUNK_RE = re.compile(r"train\d+\.tfrecords")
    else:
        CHUNK_RE = re.compile(r"test\d+\.tfrecords")

    processed_dir = './data'
    chunk_files = [os.path.join(processed_dir, fname)
                   for fname in os.listdir(processed_dir)
                   if CHUNK_RE.match(fname)]

    image, label = norb.read_norb_tfrecord(chunk_files, epochs)

    if is_train:
        # TODO: is it the right order: add noise, resize, then corp?
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)

        image = tf.image.resize_images(image, [48, 48])
        image = tf.random_crop(image, [32, 32, 1])
    else:
        image = tf.image.resize_images(image, [48, 48])
        image = tf.slice(image, [8, 8, 0], [32, 32, 1])

    x, y = tf.train.shuffle_batch([image, label], num_threads=cfg.num_threads, batch_size=cfg.batch_size, capacity=cfg.batch_size * 64,
                                  min_after_dequeue=cfg.batch_size * 32, allow_smaller_final_batch=False)

    return x, y
Ejemplo n.º 16
0
def distorted_inputs(data_dir, batch_size):
  """Construct distorted input for CIFAR training using the Reader ops.

  Args:
    data_dir: file name list.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 1] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
  filenames = get_train_filenames(data_dir)
  print(filenames)
  for f in filenames:
    if not gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)

  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(filenames)

  # Read examples from files in the filename queue.
  read_input = read_aurora(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = IMAGE_SIZE
  width = IMAGE_SIZE
  # angle = int(random.random()*360)
  # M = cv2.getRotationMatrix2D((IMAGE_SIZE/2, IMAGE_SIZE/2), angle, 1)
  # dst = cv2.warpAffine(reshaped_image, M, (IMAGE_SIZE, IMAGE_SIZE))
  # # Convert rotated image back to tensor
  # rotated_tensor = tf.convert_to_tensor(np.array(dst))

  # Image processing for training the network. Note the many random
  # distortions applied to the image.

  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(reshaped_image, [height, width, 1])
  # distorted_image = tf.image.resize_area()

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  # Because these operations are not commutative, consider randomizing
  # randomize the order their operation.
  # distorted_image = tf.image.random_brightness(distorted_image,
  #                                              max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)

  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_whitening(distorted_image)

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue)
  print ('Filling queue with %d aurora images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size)
Ejemplo n.º 17
0
def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 
        random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True):
    
    images = ops.convert_to_tensor(image_list, dtype=tf.string)
    labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
    
    # Makes an input queue
    input_queue = tf.train.slice_input_producer([images, labels],
        num_epochs=max_nrof_epochs, shuffle=shuffle)

    images_and_labels = []
    for _ in range(nrof_preprocess_threads):
        image, label = read_images_from_disk(input_queue)
        if random_rotate:
            image = tf.py_func(random_rotate_image, [image], tf.uint8)
        if random_crop:
            image = tf.random_crop(image, [image_size, image_size, 3])
        else:
            image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)
        if random_flip:
            image = tf.image.random_flip_left_right(image)
        #pylint: disable=no-member
        image.set_shape((image_size, image_size, 3))
        image = tf.image.per_image_standardization(image)
        images_and_labels.append([image, label])

    image_batch, label_batch = tf.train.batch_join(
        images_and_labels, batch_size=batch_size,
        capacity=4 * nrof_preprocess_threads * batch_size,
        allow_smaller_final_batch=True)
  
    return image_batch, label_batch
def read_and_preprocess(example_data):
    parsed = tf.parse_single_example(example_data, {
      'image/encoded': tf.FixedLenFeature((), tf.string, ''),
      'image/class/label': tf.FixedLenFeature([], tf.int64, 1),
    })
    image_bytes = tf.reshape(parsed['image/encoded'], shape=[])
    label = tf.cast(
      tf.reshape(parsed['image/class/label'], shape=[]), dtype=tf.int32) - 1

    # end up with pixel values that are in the -1, 1 range
    image = tf.image.decode_jpeg(image_bytes, channels=NUM_CHANNELS)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32) # 0-1
    image = tf.expand_dims(image, 0) # resize_bilinear needs batches

    image = tf.image.resize_bilinear(
      image, [HEIGHT + 10, WIDTH + 10], align_corners=False)
    image = tf.squeeze(image)  # remove batch dimension
    image = tf.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=63.0 / 255.0)
    image = tf.image.random_contrast(image, lower=0.2, upper=1.8)

        
    #pixel values are in range [0,1], convert to [-1,1]
    image = tf.subtract(image, 0.5)
    image = tf.multiply(image, 2.0)
    #return {'image':image}, label
    return image, label
def distorted_inputs(data_dir, batch_size):
  """Construct distorted input for CIFAR training using the Reader ops.
  Args:
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.
  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
#  filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
#               for i in xrange(1, 6)]
  filenames = ['/export/ddorroh/datasets/container/batches-bin/train_batch.bin']
  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)

  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(filenames)

  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = IMAGE_SIZE
  width = IMAGE_SIZE

  # Image processing for training the network. Note the many random
  # distortions applied to the image.

  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  # Because these operations are not commutative, consider randomizing
  # randomize the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)

  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_standardization(distorted_image)

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CIFAR images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.

  images, labels = _generate_image_and_label_batch(float_image, read_input.label,
                                                   min_queue_examples, batch_size)

  tf.summary.image('distorted_images', images)

  return images, labels
Ejemplo n.º 20
0
    def add_image_distortion(self):
        with tf.variable_scope('distort_image'):
            image = tf.image.decode_jpeg(self.jpeg, channels=3)
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
            crop_scale = tf.random_uniform([], minval=0.5, maxval=1)
            height = tf.cast(INPUT_SIZE[0] / crop_scale, tf.int32)
            width = tf.cast(INPUT_SIZE[1] / crop_scale, tf.int32)
            image = tf.image.resize_images(image, height, width)

            image = tf.random_crop(image, [INPUT_SIZE[0], INPUT_SIZE[1], 3])
            image = tf.image.random_flip_left_right(image)

            def distort_colors_1():
                i = tf.image.random_brightness(image, max_delta=32. / 255.)
                i = tf.image.random_saturation(i, lower=0.5, upper=1.5)
                i = tf.image.random_hue(i, max_delta=0.2)
                i = tf.image.random_contrast(i, lower=0.5, upper=1.5)
                return i

            def distort_colors_2():
                i = tf.image.random_brightness(image, max_delta=32. / 255.)
                i = tf.image.random_contrast(i, lower=0.5, upper=1.5)
                i = tf.image.random_saturation(i, lower=0.5, upper=1.5)
                i = tf.image.random_hue(i, max_delta=0.2)
                return i

            image = tf.cond(tf.equal(0, tf.random_uniform(shape=[], maxval=2, dtype=tf.int32)),
                            distort_colors_1, distort_colors_2)

            image = tf.sub(image, 0.5)
            image = tf.mul(image, 2.0)
            self.distorted_image = image
Ejemplo n.º 21
0
def inputs(tf_dir, is_train, batch_size, num_epochs=None):
  image, caption_tids, cocoid = records(tf_dir, num_epochs)

  reshaped_image = tf.image.resize_images(image, IM_S, IM_S)

  if is_train:
    distorted_image = tf.random_crop(reshaped_image, [CNN_S, CNN_S, 3])
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=32./255.)
    distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)
    distorted_image = tf.clip_by_value(distorted_image, 0.0, 1.0)
  else:
    distorted_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, CNN_S, CNN_S)

  image = distorted_image

  # [0,1) --> [-1,1)
  image = tf.sub(image, 0.5)
  image = tf.mul(image, 2.0)

  num_preprocess_threads = 4
  min_queue_examples = 20

  outputs = [image, caption_tids, cocoid]

  return tf.train.shuffle_batch(
      outputs,
      batch_size=batch_size,
      num_threads=num_preprocess_threads,
      capacity=min_queue_examples + 3 * batch_size,
      min_after_dequeue=min_queue_examples)
Ejemplo n.º 22
0
def preprocess_for_train(image,
                         output_height,
                         output_width,
                         padding):
  """Preprocesses the given image for training.
  Note that the actual resizing scale is sampled from
    [`resize_size_min`, `resize_size_max`].
  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    padding: The amound of padding before and after each dimension of the image.
  Returns:
    A preprocessed image.
  """

  # Transform the image to floats.
  image = tf.to_float(image)
  if padding > 0:
    image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
  angles = 0.1 * np.pi * np.random.randint(8,size=1) - 0.4 * np.pi
  image = tf.contrib.image.rotate(image, angles)
  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(image,
                                   [output_height, output_width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
  #distorted_image = tf.image.random_contrast(distorted_image,lower=0.2, upper=1.8)
  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_standardization(distorted_image)
Ejemplo n.º 23
0
def input_data_t(data_dir, trainfile, batch_size, shuffle=True):
    image_list, label_list = read_labeled_image_list(data_dir, trainfile)

    images = ops.convert_to_tensor(image_list, dtype=dtypes.string)
    labels = ops.convert_to_tensor(label_list, dtype=dtypes.int32)

    # Makes an input queue
    input_queue = tf.train.slice_input_producer([images, labels],
                                            num_epochs=64,
                                            shuffle=True)

    image, label = read_images_from_disk(input_queue)

    distored_image = tf.random_crop(image, [HEIGHT, WIDTH, 3])
    distorted_image = tf.image.random_flip_left_right(distored_image)
    distorted_image = tf.image.random_brightness(distorted_image,max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image,lower=0.2, upper=1.8)
    float_image = tf.image.per_image_whitening(distorted_image)

    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
            min_fraction_of_examples_in_queue)
    print ('Filling queue with %d CIFAR images before starting to train. '
            'This will take a few minutes.' % min_queue_examples)


    return _generate_image_and_label_batch(float_image, label, min_queue_examples, batch_size, shuffle)
Ejemplo n.º 24
0
        def pr_image(image):

            reshaped_image = random_resize(image, H['arch']['min_scale'],
                                           H['arch']['max_scale'])

            # Randomly crop a [height, width] section of the image.
            distorted_image = tf.random_crop(reshaped_image,
                                             [H['arch']['input_size'],
                                              H['arch']['input_size'],
                                              H['arch']['num_channels']])

            distorted_image.set_shape([H['arch']['input_size'],
                                       H['arch']['input_size'],
                                       H['arch']['num_channels']])

            # Randomly flip the image horizontally.
            distorted_image = tf.image.random_flip_left_right(distorted_image)

            # Because these operations are not commutative,
            #  consider randomizing, randomize the order their operation.
            distorted_image = tf.image.random_brightness(distorted_image,
                                                         max_delta=63)
            distorted_image = tf.image.random_contrast(distorted_image,
                                                       lower=0.2, upper=1.8)
            distorted_image = tf.image.random_hue(distorted_image,
                                                  max_delta=0.2)

            distorted_image = tf.image.random_saturation(distorted_image,
                                                         lower=0.5,
                                                         upper=1.5)

            return tf.image.per_image_whitening(distorted_image)
Ejemplo n.º 25
0
 def map_train(image, label):
     image = tf.image.resize_image_with_crop_or_pad(image, image_size + 4,
                                                    image_size + 4)
     image = tf.random_crop(image, [image_size, image_size, 3])
     image = tf.image.random_flip_left_right(image)
     image = tf.image.per_image_standardization(image)
     return (image, label)
Ejemplo n.º 26
0
def distorted_inputs(batch_size):
  path = "train"
  read_input = read_cifar10(path)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = IMAGE_SIZE_Y
  width = IMAGE_SIZE_X

  distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)

  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_whitening(distorted_image)

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CIFAR images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)
def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255):
    """
    Randomly crop and pads the input images.

    Args:
      image: Training image to crop/ pad.
      label: Segmentation mask to crop/ pad.
      crop_h: Height of cropped segment.
      crop_w: Width of cropped segment.
      ignore_label: Label to ignore during the training.
    """

    label = tf.cast(label, dtype=tf.float32)
    label = label - ignore_label # Needs to be subtracted and later added due to 0 padding.
    combined = tf.concat(axis=2, values=[image, label]) 
    image_shape = tf.shape(image)
    combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1]))
    
    last_image_dim = tf.shape(image)[-1]
    # last_label_dim = tf.shape(label)[-1]
    combined_crop = tf.random_crop(combined_pad, [crop_h, crop_w, 4])
    img_crop = combined_crop[:, :, :last_image_dim]
    label_crop = combined_crop[:, :, last_image_dim:]
    label_crop = label_crop + ignore_label
    label_crop = tf.cast(label_crop, dtype=tf.uint8)
    
    # Set static shape so that tensorflow knows shape at compile time. 
    img_crop.set_shape((crop_h, crop_w, 3))
    label_crop.set_shape((crop_h,crop_w, 1))
    return img_crop, label_crop  
Ejemplo n.º 28
0
def distorted_inputs(data_dir, batch_size):
  """Construct distorted input for CIFAR training using the Reader ops.

  Args:
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """  
  global TRAIN
  global VAL_NUM
  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(get_data(data_dir))

  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = IMAGE_SIZE
  width = IMAGE_SIZE

  paddings = [[4,4],[4,4],[0,0]]  
  reshaped_image = tf.pad(reshaped_image, paddings, "CONSTANT")

  height = IMAGE_SIZE
  width = IMAGE_SIZE

  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  # Image processing for training the network. Note the many random
  # distortions applied to the image.

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)

  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_whitening(distorted_image)

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CIFAR images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)
Ejemplo n.º 29
0
 def preprocess(self, image):
   """Preprocess a single image in [height, width, depth] layout."""
   if self.subset == 'train' and self.use_distortion:
     # Pad 4 pixels on each dimension of feature map, done in mini-batch
     image = tf.image.resize_image_with_crop_or_pad(image, 40, 40)
     image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH])
     image = tf.image.random_flip_left_right(image)
   return image
Ejemplo n.º 30
0
def test(is_train=True):
    """Instruction on how to read data from tfrecord"""

    # 1. use regular expression to find all files we want
    import re
    if is_train:
        CHUNK_RE = re.compile(r"train\d+\.tfrecords")
    else:
        CHUNK_RE = re.compile(r"test\d+\.tfrecords")

    processed_dir = './data'
    # 2. parse them into a list of file name
    chunk_files = [os.path.join(processed_dir, fname)
                   for fname in os.listdir(processed_dir)
                   if CHUNK_RE.match(fname)]
    # 3. pass argument into read method
    image, label = read_norb_tfrecord(chunk_files, 2)

    image = tf.image.random_brightness(image, max_delta=32. / 255.)
    image = tf.image.random_contrast(image, lower=0.5, upper=1.5)

    image = tf.image.resize_images(image, [48, 48])

    """Batch Norm"""
    params_shape = [image.get_shape()[-1]]
    beta = tf.get_variable(
        'beta', params_shape, tf.float32,
        initializer=tf.constant_initializer(0.0, tf.float32))
    gamma = tf.get_variable(
        'gamma', params_shape, tf.float32,
        initializer=tf.constant_initializer(1.0, tf.float32))
    mean, variance = tf.nn.moments(image, [0, 1, 2])
    image = tf.nn.batch_normalization(image, mean, variance, beta, gamma, 0.001)

    image = tf.random_crop(image, [32, 32, 1])

    batch_size = 8
    x, y = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=batch_size * 64,
                                  min_after_dequeue=batch_size * 32, allow_smaller_final_batch=False)
    logger.debug('x shape: {}, y shape: {}'.format(x.get_shape(), y.get_shape()))

    # 初始化所有的op
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(init)
        # 启动队列
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for i in range(2):
            val, l = sess.run([x, y])
            # l = to_categorical(l, 12)
            print(val, l)
        coord.join()

    logger.debug('Test read tf record Succeed')
Ejemplo n.º 31
0
def process_image(encoded_image,
                  is_training,
                  height,
                  width,
                  resize_height=346,
                  resize_width=346,
                  thread_id=0,
                  image_format="jpeg"):
    """Decode an image, resize and apply random dirtortions.

    In training, images are distorted slightly differently depending on thread_id.

    Args:
        encoded_image: String Tensor containing the image.
        is_training: Boolean; whether preprocessing for training or eval.
        height: Height of the output image.
        width: Width of the output image.
        resize_height: If > 0, resize height before crop to final dimensions.
        resize_width: If > 0, resize width before crop to final dimensions.
        thread_id: Preprocessing thread id used to select the ordering of color
            distortions. There should be a multiple of 2 preprocessing threads.
        image_format: "jpeg" or "png".

    Returns:
        A float32 Tensor of shape [height, width, 3] with values in [-1,1]

    Raises:
        ValueError: If image_format is invalid.
    """

    # Helper function to log an image summary to the visualizer. Summaries are
    # only logged in thread 0.
    def image_summary(name, image):
        if not thread_id:
            tf.summary.image(name, tf.expand_dims(image, 0))

    # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1).
    with tf.name_scope("decode", values=[encoded_image]):
        if image_format == "jpeg":
            image = tf.image.decode_jpeg(encoded_image, channels=3)
        elif image_format == "png":
            image = tf.image.decode_png(encoded_image, channels=3)
        else:
            raise ValueError("Invalid image format: %s" % image_format)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image_summary("original_image", image)

    # Resize image.
    assert (resize_height > 0) == (resize_width > 0)
    if resize_height:
        image = tf.image.resize_images(image,
                                       size=[resize_height, resize_width],
                                       method=tf.image.ResizeMethod.BILINEAR)

    # Crop to final dimensions.
    if is_training:
        image = tf.random_crop(image, [height, width, 3])
    else:
        # Central crop, assuming resize_height > height, resize_width > width
        image = tf.image.resize_image_with_crop_or_pad(image, height, width)

    image_summary("resized_image", image)

    # Randomly distort the image.
    if is_training:
        image = distort_image(image, thread_id)

    image_summary("final_image", image)

    # Rescale to [-1, 1] instead of [0, 1]
    image = tf.subtract(image, 0.5)
    image = tf.multiply(image, 2.0)
    return image
Ejemplo n.º 32
0
tf.sparse_reduce_max()
tf.sparse_reduce_max_sparse()

tf.reduce_all()
tf.reduce_any()
tf.reduce_join()
tf.reduce_logsumexp()
tf.reduce_max()
tf.reduce_mean()
tf.reduce_min()
tf.reduce_prod()
tf.reduce_sum()
tf.reduced_shape()

tf.random_crop()
tf.random_gamma()
tf.random_normal()
tf.random_poisson()
tf.random_poisson_v2()
tf.random_shuffle()
tf.random_uniform()

tf.where()
tf.while_loop()
tf.write_file()
tf.read_file()

tf.record_input()
tf.reshape()
tf.restore_v2()
Ejemplo n.º 33
0
    dropout2 = tf.nn.dropout(fc2, keep_prob)
    logits = tf.contrib.layers.fully_connected(
        dropout2,
        10,
        activation_fn=None,
        weights_regularizer=tf.contrib.layers.l2_regularizer(
            tf.constant(0.0005, dtype=tf.float32)))

    cost = loss(logits, feed_labels)

    opt_mom = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9)
    opt = opt_mom.minimize(cost)

    acc = accuracy(logits, feed_labels)

img_scale_crop = tf.random_crop(
    tf.image.resize_images(aug_img, get_new_size()), [96, 96, 3])
img_rand_flip_lr = tf.image.random_flip_left_right(aug_img)
img_rand_flip_ud = tf.image.random_flip_up_down(aug_img)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

builder = tf.saved_model.builder.SavedModelBuilder("/output/cnn_model_final")

while (ne < num_epochs):
    stime = time.time()
    print 'epoch::', ne + 1, '...'
    if ne != 0:
        np.random.shuffle(index)
        train_x = train_x[index]
        train_y = train_y[index]
Ejemplo n.º 34
0
def run(image_size, ind):
    
    label = showimage(ind)
    print(label)
    image_name = 'image_test.jpg'
    image_string = open(image_name, 'rb').read()
    image = tf.image.decode_jpeg(image_string, channels=3)

    if image.dtype != tf.float32:
       image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    
    height = image_size
    width = image_size

    distorted_image1 = tf.random_crop(image, [height, width,3])

    # Randomly flip the image horizontally.
    distorted_image2 = tf.image.random_flip_left_right(distorted_image1)
 
    # Because these operations are not commutative, consider randomizing
    # randomize the order their operation.
    distorted_image3 = tf.image.random_brightness(distorted_image2,  max_delta=63)
    
    distorted_image4 = tf.image.random_contrast(distorted_image3,lower=0.2, upper=1.8)
  
    #Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_standardization(distorted_image4)
    
    # Save the processed image for Fathom
    with tf.Session() as sess:
        i1 = image.eval()
        i2 = distorted_image1.eval()
        i3 = distorted_image2.eval()
        i4 = distorted_image3.eval()
        i5 = distorted_image4.eval()
        i6 = float_image.eval()
    i7 = i6
    for i in range(24):
      for j in range(24):
        for k in range(3):
          if i7[i][j][k] > 1:
            i7[i][j][k]=1
          if i7[i][j][k] < -1:
            i7[i][j][k]=-1    

    fig = pylab.figure()
    a1 = fig.add_subplot(231)
    a1.set_title("original image")
    a2 = fig.add_subplot(232)
    a2.set_title("after crop")
    a3 = fig.add_subplot(233)
    a3.set_title("after flip")
    a4 = fig.add_subplot(234)
    a4.set_title("random brightness")
    a5 = fig.add_subplot(235)
    a5.set_title("random contrast")
    a6 = fig.add_subplot(236)
    a6.set_title("standardization")
    a1.imshow(i1)
    a2.imshow(i2)
    a3.imshow(i3)
    a4.imshow(i4)
    a5.imshow(i5)
    a6.imshow(i6)
    pylab.axis("off")
    pylab.show()
    fig.savefig('temp.png',dpi=fig.dpi)
Ejemplo n.º 35
0
def _read_and_decode(filename_queue,
                     image_dim=28,
                     distort=False,
                     split='train'):
    """Reads a single record and converts it to a tensor.

  Args:
    filename_queue: Tensor Queue, list of input files.
    image_dim: Scalar, the height (and width) of the image in pixels.
    distort: Boolean, whether to distort the input or not.
    split: String, the split of the data (test or train) to read from.

  Returns:
    Dictionary of the (Image, label) and the image height.

  """

    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'image_raw':
                                           tf.FixedLenFeature([], tf.string),
                                           'label':
                                           tf.FixedLenFeature([], tf.int64),
                                           'height':
                                           tf.FixedLenFeature([], tf.int64),
                                           'width':
                                           tf.FixedLenFeature([], tf.int64),
                                           'depth':
                                           tf.FixedLenFeature([], tf.int64)
                                       })

    # Convert from a scalar string tensor (whose single string has
    # length image_pixel*image_pixel) to a uint8 tensor with shape
    # [image_pixel, image_pixel, 1].
    image = tf.decode_raw(features['image_raw'], tf.uint8)

    image = tf.reshape(image, [image_dim, image_dim, 1])
    image.set_shape([image_dim, image_dim, 1])

    # Convert from [0, 255] -> [-0.5, 0.5] floats.
    image = tf.cast(image, tf.float32) * (1. / 255)

    if distort:
        cropped_dim = image_dim - 4
        if split == 'train':
            image = tf.reshape(image, [image_dim, image_dim])
            image = tf.random_crop(image, [cropped_dim, cropped_dim])
            # 0.26179938779 is 15 degress in radians
            image = tf.contrib.image.rotate(
                image, random.uniform(-0.26179938779, 0.26179938779))
            image = tf.reshape(image, [cropped_dim, cropped_dim, 1])
            image.set_shape([cropped_dim, cropped_dim, 1])
        else:
            fraction = cropped_dim / image_dim
            image = tf.image.central_crop(image, central_fraction=fraction)
            image.set_shape([cropped_dim, cropped_dim, 1])
        image_dim = cropped_dim

    # Convert label from a scalar uint8 tensor to an int32 scalar.
    label = tf.cast(features['label'], tf.int32)
    features = {
        'images': image,
        'labels': tf.one_hot(label, 10),
        'recons_image': image,
        'recons_label': label,
    }

    return features, image_dim
Ejemplo n.º 36
0
def main():
    args = parser.parse_args()

    # We store all arguments in a json file. This has two advantages:
    # 1. We can always get back and see what exactly that experiment was
    # 2. We can resume an experiment as-is without needing to remember all flags.
    args_file = os.path.join(args.experiment_root, 'args.json')
    if args.resume:
        if not os.path.isfile(args_file):
            raise IOError('`args.json` not found in {}'.format(args_file))

        print('Loading args from {}.'.format(args_file))
        with open(args_file, 'r') as f:
            args_resumed = json.load(f)
        args_resumed['resume'] = True  # This would be overwritten.

        # When resuming, we not only want to populate the args object with the
        # values from the file, but we also want to check for some possible
        # conflicts between loaded and given arguments.
        for key, value in args.__dict__.items():
            if key in args_resumed:
                resumed_value = args_resumed[key]
                if resumed_value != value:
                    print('Warning: For the argument `{}` we are using the'
                          ' loaded value `{}`. The provided value was `{}`'
                          '.'.format(key, resumed_value, value))
                    comand = input('Would you like to restore it?(yes/no)')
                    if comand == 'yes':
                        args.__dict__[key] = resumed_value
                        print(
                            'For the argument `{}` we are using the loaded value `{}`.'
                            .format(key, args.__dict__[key]))
                    else:
                        print(
                            'For the argument `{}` we are using the provided value `{}`.'
                            .format(key, args.__dict__[key]))
            else:
                print('Warning: A new argument was added since the last run:'
                      ' `{}`. Using the new value: `{}`.'.format(key, value))
        os.remove(args_file)
        with open(args_file, 'w') as f:
            json.dump(vars(args),
                      f,
                      ensure_ascii=False,
                      indent=2,
                      sort_keys=True)

    else:
        # If the experiment directory exists already, we bail in fear.
        if os.path.exists(args.experiment_root):
            if os.listdir(args.experiment_root):
                print('The directory {} already exists and is not empty.'
                      ' If you want to resume training, append --resume to'
                      ' your call.'.format(args.experiment_root))
                exit(1)
        else:
            os.makedirs(args.experiment_root)

        # Store the passed arguments for later resuming and grepping in a nice
        # and readable format.
        with open(args_file, 'w') as f:
            json.dump(vars(args),
                      f,
                      ensure_ascii=False,
                      indent=2,
                      sort_keys=True)

    log_file = os.path.join(args.experiment_root, "train")
    logging.config.dictConfig(common.get_logging_dict(log_file))
    log = logging.getLogger('train')

    # Also show all parameter values at the start, for ease of reading logs.
    log.info('Training using the following parameters:')
    for key, value in sorted(vars(args).items()):
        log.info('{}: {}'.format(key, value))

    # Check them here, so they are not required when --resume-ing.
    if not args.train_set:
        parser.print_help()
        log.error("You did not specify the `train_set` argument!")
        sys.exit(1)
    if not args.image_root:
        parser.print_help()
        log.error("You did not specify the required `image_root` argument!")
        sys.exit(1)

    # Load the data from the TxT file. see Common.load_dataset function for details
    pids, fids = common.load_dataset(args.train_set, args.image_root)
    max_fid_len = max(map(len, fids))  # We'll need this later for logfiles.

    # Setup a tf.Dataset where one "epoch" loops over all PIDS.
    # PIDS are shuffled after every epoch and continue indefinitely.
    unique_pids = np.unique(pids)
    dataset = tf.data.Dataset.from_tensor_slices(unique_pids)
    dataset = dataset.shuffle(len(unique_pids))

    # Constrain the dataset size to a multiple of the batch-size, so that
    # we don't get overlap at the end of each epoch.
    dataset = dataset.take((len(unique_pids) // args.batch_p) * args.batch_p)
    dataset = dataset.repeat(None)  # Repeat forever. Funny way of stating it.

    # For every PID, get K images.
    dataset = dataset.map(lambda pid: sample_k_fids_for_pid(
        pid, all_fids=fids, all_pids=pids, batch_k=args.batch_k
    ))  # now the dataset has been modified as [selected_fids
    # , pid] due to the return of the function 'sample_k_fids_for_pid'

    # Ungroup/flatten the batches for easy loading of the files.
    dataset = dataset.apply(tf.contrib.data.unbatch())

    # Convert filenames to actual image tensors.
    net_input_size = (args.net_input_height, args.net_input_width)
    pre_crop_size = (args.pre_crop_height, args.pre_crop_width)
    dataset = dataset.map(
        lambda fid, pid: common.fid_to_image(fid,
                                             pid,
                                             image_root=args.image_root,
                                             image_size=pre_crop_size if args.
                                             crop_augment else net_input_size),
        num_parallel_calls=args.loading_threads
    )  # now the dataset has been modified as [selected_images
    # , fid, pid] due to the return of the function 'fid_to_image'

    # Augment the data if specified by the arguments.
    if args.flip_augment:
        dataset = dataset.map(lambda im, fid, pid:
                              (tf.image.random_flip_left_right(im), fid, pid))
    if args.crop_augment:
        dataset = dataset.map(lambda im, fid, pid: (tf.random_crop(
            im, net_input_size + (3, )), fid, pid))

    # Group it back into PK batches.
    batch_size = args.batch_p * args.batch_k
    dataset = dataset.batch(batch_size)

    # Overlap producing and consuming for parallelism.
    dataset = dataset.prefetch(1)

    # Since we repeat the data infinitely, we only need a one-shot iterator.
    images, fids, pids = dataset.make_one_shot_iterator().get_next()

    # Create the model and an embedding head.
    model = import_module('nets.' + args.model_name)
    head = import_module('heads.' + args.head_name)

    # Feed the image through the model. The returned `body_prefix` will be used
    # further down to load the pre-trained weights for all variables with this
    # prefix.
    endpoints, body_prefix = model.endpoints(images, is_training=True)
    if args.head_name == 'fusion':
        with tf.name_scope('head'):
            endpoints = head.head(endpoints,
                                  args.embedding_dim,
                                  args.model_name,
                                  is_training=True)
    else:
        with tf.name_scope('head'):
            endpoints = head.head(endpoints,
                                  args.embedding_dim,
                                  is_training=True)

    # Create the loss in two steps:
    # 1. Compute all pairwise distances according to the specified metric.
    # 2. For each anchor along the first dimension, compute its loss.
    # dists = loss.cdist(endpoints['emb'], endpoints['emb'], metric=args.metric)
    # losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[args.loss](
    #     dists, pids, args.margin, batch_precision_at_k=args.batch_k-1)
    # # '_' stands for the boolean matrix shows topK where the correct match of the identities occurs
    # shape=(batch_size,K)


# 更改loss1
    dists1 = loss.cdist(endpoints['feature1'],
                        endpoints['feature1'],
                        metric=args.metric)
    losses1, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists1, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists2 = loss.cdist(endpoints['feature2'],
                        endpoints['feature2'],
                        metric=args.metric)
    losses2, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists2, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists3 = loss.cdist(endpoints['feature3'],
                        endpoints['feature3'],
                        metric=args.metric)
    losses3, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists3, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists4 = loss.cdist(endpoints['feature4'],
                        endpoints['feature4'],
                        metric=args.metric)
    losses4, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists4, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists_fu = loss.cdist(endpoints['fusion_layer'],
                          endpoints['fusion_layer'],
                          metric=args.metric)
    losses_fu, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[
        args.loss](dists_fu,
                   pids,
                   args.margin,
                   batch_precision_at_k=args.batch_k - 1)

    losses = losses1 + losses2 + losses3 + losses4 + losses_fu

    # losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss_m.LOSS_CHOICES[args.loss](
    #     endpoints, pids, args.margin, args.model_name, batch_precision_at_k=args.batch_k - 1, metric =args.metric
    # )

    # Count the number of active entries, and compute the total batch loss.
    num_active = tf.reduce_sum(tf.cast(tf.greater(losses, 1e-5), tf.float32))

    # 此处losses即为 pospair 比 negpair+margin 还大的部分
    loss_mean = tf.reduce_mean(losses)

    # Some logging for tensorboard.
    tf.summary.histogram('loss_distribution', losses)
    tf.summary.scalar('loss', loss_mean)
    tf.summary.scalar('batch_top1', train_top1)
    tf.summary.scalar('batch_prec_at_{}'.format(args.batch_k - 1), prec_at_k)
    tf.summary.scalar('active_count', num_active)
    #tf.summary.histogram('embedding_dists', dists)
    tf.summary.histogram('embedding_pos_dists', pos_dists)
    tf.summary.histogram('embedding_neg_dists', neg_dists)
    tf.summary.histogram('embedding_lengths',
                         tf.norm(endpoints['emb_raw'], axis=1))

    # Create the mem-mapped arrays in which we'll log all training detail in
    # addition to tensorboard, because tensorboard is annoying for detailed
    # inspection and actually discards data in histogram summaries.
    if args.detailed_logs:
        log_embs = lb.create_or_resize_dat(
            os.path.join(args.experiment_root, 'embeddings'),
            dtype=np.float32,
            shape=(args.train_iterations, batch_size, args.embedding_dim))
        log_loss = lb.create_or_resize_dat(
            os.path.join(args.experiment_root, 'losses'),
            dtype=np.float32,
            shape=(args.train_iterations, batch_size))
        log_fids = lb.create_or_resize_dat(
            os.path.join(args.experiment_root, 'fids'),
            dtype='S' + str(max_fid_len),
            shape=(args.train_iterations, batch_size))

    # These are collected here before we add the optimizer, because depending
    # on the optimizer, it might add extra slots, which are also global
    # variables, with the exact same prefix.
    model_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        body_prefix)

    # Define the optimizer and the learning-rate schedule.
    # Unfortunately, we get NaNs if we don't handle no-decay separately.
    global_step = tf.Variable(
        0, name='global_step',
        trainable=False)  # 'global_step' means the number of batches seen
    #  by graph
    if 0 <= args.decay_start_iteration < args.train_iterations:
        learning_rate = tf.train.exponential_decay(
            args.learning_rate,
            tf.maximum(0, global_step - args.decay_start_iteration
                       ),  # decay every 'lr_decay_steps' after the
            # 'decay_start_iteration'
            # args.train_iterations - args.decay_start_iteration, args.weight_decay_factor)
            args.lr_decay_steps,
            args.lr_decay_factor,
            staircase=True)
    else:
        learning_rate = args.learning_rate  # the case when we set 'decay_start_iteration' as -1
    tf.summary.scalar('learning_rate', learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-3)
    # Feel free to try others!
    # optimizer = tf.train.AdadeltaOptimizer(learning_rate)

    # Update_ops are used to update batchnorm stats.
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = optimizer.minimize(loss_mean, global_step=global_step)

    # Define a saver for the complete model.
    checkpoint_saver = tf.train.Saver(max_to_keep=0)

    with tf.Session(config=config) as sess:
        if args.resume:
            # In case we're resuming, simply load the full checkpoint to init.
            last_checkpoint = tf.train.latest_checkpoint(args.experiment_root)
            log.info('Restoring from checkpoint: {}'.format(last_checkpoint))
            checkpoint_saver.restore(sess, last_checkpoint)
        else:
            # But if we're starting from scratch, we may need to load some
            # variables from the pre-trained weights, and random init others.
            sess.run(tf.global_variables_initializer())
            if args.initial_checkpoint is not None:
                saver = tf.train.Saver(model_variables)
                saver.restore(
                    sess, args.initial_checkpoint
                )  # restore the pre-trained parameter from online model

            # In any case, we also store this initialization as a checkpoint,
            # such that we could run exactly re-producable experiments.
            checkpoint_saver.save(sess,
                                  os.path.join(args.experiment_root,
                                               'checkpoint'),
                                  global_step=0)

        merged_summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(args.experiment_root,
                                               sess.graph)

        start_step = sess.run(global_step)
        log.info('Starting training from iteration {}.'.format(start_step))

        # Finally, here comes the main-loop. This `Uninterrupt` is a handy
        # utility such that an iteration still finishes on Ctrl+C and we can
        # stop the training cleanly.
        with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u:
            for i in range(start_step, args.train_iterations):

                # Compute gradients, update weights, store logs!
                start_time = time.time()
                _, summary, step, b_prec_at_k, b_embs, b_loss, b_fids = \
                    sess.run([train_op, merged_summary, global_step,
                              prec_at_k, endpoints['emb'], losses, fids])
                elapsed_time = time.time() - start_time

                # Compute the iteration speed and add it to the summary.
                # We did observe some weird spikes that we couldn't track down.
                summary2 = tf.Summary()
                summary2.value.add(tag='secs_per_iter',
                                   simple_value=elapsed_time)
                summary_writer.add_summary(summary2, step)
                summary_writer.add_summary(summary, step)

                if args.detailed_logs:
                    log_embs[i], log_loss[i], log_fids[
                        i] = b_embs, b_loss, b_fids

                # Do a huge print out of the current progress.
                seconds_todo = (args.train_iterations - step) * elapsed_time
                log.info(
                    'iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, '
                    'batch-p@{}: {:.2%}, ETA: {} ({:.2f}s/it)'.format(
                        step, float(np.min(b_loss)), float(np.mean(b_loss)),
                        float(np.max(b_loss)), args.batch_k - 1,
                        float(b_prec_at_k),
                        timedelta(seconds=int(seconds_todo)), elapsed_time))
                sys.stdout.flush()
                sys.stderr.flush()

                # Save a checkpoint of training every so often.
                if (args.checkpoint_frequency > 0
                        and step % args.checkpoint_frequency == 0):
                    checkpoint_saver.save(sess,
                                          os.path.join(args.experiment_root,
                                                       'checkpoint'),
                                          global_step=step)

                # Stop the main-loop at the end of the step, if requested.
                if u.interrupted:
                    log.info("Interrupted on request!")
                    break

        # Store one final checkpoint. This might be redundant, but it is crucial
        # in case intermediate storing was disabled and it saves a checkpoint
        # when the process was interrupted.
        checkpoint_saver.save(sess,
                              os.path.join(args.experiment_root, 'checkpoint'),
                              global_step=step)
Ejemplo n.º 37
0
def cnn_model_fn(features, labels, mode, num_classes=20):
    # Write this function
    """Model function for CNN."""
    # Input Layer
    N = features["x"].shape[0]
    # input_layer = tf.reshape(features["x"], [-1, 256, 256, 3])

    if mode != tf.estimator.ModeKeys.PREDICT:
        crop_layer = [
            tf.image.random_flip_left_right(
                tf.image.random_flip_up_down(
                    tf.random_crop(features["x"][0, :, :, :], [224, 224, 3])))
        ]
        for i in range(1, N):
            crop_layer = tf.concat([
                crop_layer,
                [
                    tf.image.random_flip_left_right(
                        tf.image.random_flip_up_down(
                            tf.random_crop(features["x"][i, :, :, :],
                                           [224, 224, 3])))
                ]
            ], 0)
        crop_layer = tf.image.resize_images(crop_layer, [256, 256])
    else:
        crop_layer = tf.image.resize_images(features["x"], [256, 256])

    # conv(k, s, n, p)
    # conv(11, 4, 96, 'VALID')
    # relu()
    with tf.variable_scope('conv1') as scope:
        conv1 = tf.layers.conv2d(
            inputs=crop_layer,
            kernel_size=[11, 11],
            strides=4,
            filters=96,
            padding="valid",
            kernel_initializer=tf.truncated_normal_initializer(mean=0,
                                                               stddev=0.01),
            bias_initializer=tf.zeros_initializer(),
            activation=tf.nn.relu)

        scope.reuse_variables()
        weights = tf.get_variable('conv2d/kernel')
        tf.summary.image('conv1/weghts', weight_2_grid(weights))

    # max_pool(3, 2)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3], strides=2)

    # conv(5, 1, 256, 'SAME')
    # relu()
    conv2 = tf.layers.conv2d(
        inputs=pool1,
        kernel_size=[5, 5],
        strides=1,
        filters=256,
        padding="same",
        kernel_initializer=tf.truncated_normal_initializer(mean=0,
                                                           stddev=0.01),
        bias_initializer=tf.zeros_initializer(),
        activation=tf.nn.relu)

    # max_pool(3, 2)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[3, 3], strides=2)

    # conv(3, 1, 384, 'SAME')
    # relu()
    conv3 = tf.layers.conv2d(
        inputs=pool2,
        kernel_size=[3, 3],
        strides=1,
        filters=384,
        padding="same",
        kernel_initializer=tf.truncated_normal_initializer(mean=0,
                                                           stddev=0.01),
        bias_initializer=tf.zeros_initializer(),
        activation=tf.nn.relu)

    # conv(3, 1, 384, 'SAME')
    # relu()
    conv4 = tf.layers.conv2d(
        inputs=conv3,
        kernel_size=[3, 3],
        strides=1,
        filters=384,
        padding="same",
        kernel_initializer=tf.truncated_normal_initializer(mean=0,
                                                           stddev=0.01),
        bias_initializer=tf.zeros_initializer(),
        activation=tf.nn.relu)

    # conv(3, 1, 256, 'SAME')
    # relu()
    conv5 = tf.layers.conv2d(
        inputs=conv4,
        kernel_size=[3, 3],
        strides=1,
        filters=256,
        padding="same",
        kernel_initializer=tf.truncated_normal_initializer(mean=0,
                                                           stddev=0.01),
        bias_initializer=tf.zeros_initializer(),
        activation=tf.nn.relu)

    # max_pool(3, 2)
    pool3 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2)

    # flatten()
    pool3_flat = tf.reshape(pool3, [-1, 6 * 6 * 256])
    # pool3_flat = tf.reshape(pool3, [int((labels.shape)[0]), -1])

    # fully_connected(4096)
    # relu()
    dense1 = tf.layers.dense(inputs=pool3_flat,
                             units=4096,
                             activation=tf.nn.relu)

    # dropout(0.5)
    dropout1 = tf.layers.dropout(inputs=dense1,
                                 rate=0.5,
                                 training=mode == tf.estimator.ModeKeys.TRAIN)

    # fully_connected(4096)
    # relu()
    dense2 = tf.layers.dense(inputs=dropout1,
                             units=4096,
                             activation=tf.nn.relu)

    # dropout(0.5)
    dropout2 = tf.layers.dropout(inputs=dense2,
                                 rate=0.5,
                                 training=mode == tf.estimator.ModeKeys.TRAIN)

    # fully_connected(20)
    # Logits Layer
    logits = tf.layers.dense(inputs=dropout2, units=20)

    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.sigmoid(logits, name="sigmoid_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    # onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
    onehot_labels = labels
    loss = tf.identity(tf.losses.sigmoid_cross_entropy(
        multi_class_labels=onehot_labels, logits=logits),
                       name='loss')

    tf.summary.scalar('training_loss', loss)
    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_global_step()
        decay_LR = tf.train.exponential_decay(0.001,
                                              global_step,
                                              10000,
                                              0.5,
                                              staircase=True)
        optimizer = tf.train.MomentumOptimizer(learning_rate=decay_LR,
                                               momentum=0.9)
        train_op = optimizer.minimize(loss=loss, global_step=global_step)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
        "accuracy":
        tf.metrics.accuracy(labels=labels,
                            predictions=predictions["probabilities"])
    }
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      eval_metric_ops=eval_metric_ops)
Ejemplo n.º 38
0
def main(args):

    network = importlib.import_module(args.model_def)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    train_set = facenet.get_dataset(args.data_dir)
    if args.filter_filename:
        train_set = filter_dataset(train_set, args.filter_filename,
                                   args.filter_percentile,
                                   args.filter_min_nrof_images_per_class)
    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, 'The dataset should not be empty'

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')

        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 1),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(1, ), (1, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder], name='enqueue_op')

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_png(file_contents)
                if args.random_rotate:
                    image = tf.py_func(facenet.random_rotate_image, [image],
                                       tf.uint8)
                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])

        image_batch, label_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))

        print('Building training graph')

        batch_norm_params = {
            # Decay for the moving averages.
            'decay': 0.995,
            # epsilon to prevent 0s in variance.
            'epsilon': 0.001,
            # force in-place updates of mean and variance estimates
            'updates_collections': None,
            # Moving averages ends up in the trainable variables collection
            'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES],
        }
        # Build the inference graph
        prelogits, _ = network.inference(image_batch,
                                         args.keep_probability,
                                         phase_train=phase_train_placeholder,
                                         weight_decay=args.weight_decay)
        bottleneck = slim.fully_connected(
            prelogits,
            args.embedding_size,
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_params,
            scope='Bottleneck',
            reuse=False)
        logits = slim.fully_connected(
            bottleneck,
            len(train_set),
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            scope='Logits',
            reuse=False)

        embeddings = tf.nn.l2_normalize(bottleneck,
                                        1,
                                        1e-10,
                                        name='embeddings')

        # Add center loss
        if args.center_loss_factor > 0.0:
            prelogits_center_loss, _ = facenet.center_loss(
                prelogits, label_batch, args.center_loss_alfa, nrof_classes)
            tf.add_to_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES,
                prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables(), args.log_histograms)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        tf.train.start_queue_runners(sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)

            # Training and validation loop
            print('Running training')
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, epoch, image_list, label_list,
                      index_dequeue_op, enqueue_op, image_paths_placeholder,
                      labels_placeholder, learning_rate_placeholder,
                      phase_train_placeholder, batch_size_placeholder,
                      global_step, total_loss, train_op, summary_op,
                      summary_writer, regularization_losses,
                      args.learning_rate_schedule_file)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

                # Evaluate on LFW
                if args.lfw_dir:
                    evaluate(sess, enqueue_op, image_paths_placeholder,
                             labels_placeholder, phase_train_placeholder,
                             batch_size_placeholder, embeddings, label_batch,
                             lfw_paths, actual_issame, args.lfw_batch_size,
                             args.lfw_nrof_folds, log_dir, step,
                             summary_writer)
    return model_dir
Ejemplo n.º 39
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness):
  """Creates the operations to apply the specified distortions.

  During training it can help to improve the results if we run the images
  through simple distortions like crops, scales, and flips. These reflect the
  kind of variations we expect in the real world, and so can help train the
  model to cope with natural data more effectively. Here we take the supplied
  parameters and construct a network of operations to apply them to an image.

  Cropping
  ~~~~~~~~

  Cropping is done by placing a bounding box at a random position in the full
  image. The cropping parameter controls the size of that box relative to the
  input image. If it's zero, then the box is the same size as the input and no
  cropping is performed. If the value is 50%, then the crop box will be half the
  width and height of the input. In a diagram it looks like this:

  <       width         >
  +---------------------+
  |                     |
  |   width - crop%     |
  |    <      >         |
  |    +------+         |
  |    |      |         |
  |    |      |         |
  |    |      |         |
  |    +------+         |
  |                     |
  |                     |
  +---------------------+

  Scaling
  ~~~~~~~

  Scaling is a lot like cropping, except that the bounding box is always
  centered and its size varies randomly within the given range. For example if
  the scale percentage is zero, then the bounding box is the same size as the
  input and no scaling is applied. If it's 50%, then the bounding box will be in
  a random range between half the width and height and full size.

  Args:
    flip_left_right: Boolean whether to randomly mirror images horizontally.
    random_crop: Integer percentage setting the total margin used around the
    crop box.
    random_scale: Integer percentage of how much to vary the scale by.
    random_brightness: Integer range to randomly multiply the pixel values by.
    graph.

  Returns:
    The jpeg input layer and the distorted result tensor.
  """

  jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH)
  decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
  decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
  margin_scale = 1.0 + (random_crop / 100.0)
  resize_scale = 1.0 + (random_scale / 100.0)
  margin_scale_value = tf.constant(margin_scale)
  resize_scale_value = tf.random_uniform(tensor_shape.scalar(),
                                         minval=1.0,
                                         maxval=resize_scale)
  scale_value = tf.multiply(margin_scale_value, resize_scale_value)
  precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH)
  precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT)
  precrop_shape = tf.stack([precrop_height, precrop_width])
  precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
  precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                              precrop_shape_as_int)
  precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0])
  cropped_image = tf.random_crop(precropped_image_3d,
                                 [MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH,
                                  MODEL_INPUT_DEPTH])
  if flip_left_right:
    flipped_image = tf.image.random_flip_left_right(cropped_image)
  else:
    flipped_image = cropped_image
  brightness_min = 1.0 - (random_brightness / 100.0)
  brightness_max = 1.0 + (random_brightness / 100.0)
  brightness_value = tf.random_uniform(tensor_shape.scalar(),
                                       minval=brightness_min,
                                       maxval=brightness_max)
  brightened_image = tf.multiply(flipped_image, brightness_value)
  distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
  return jpeg_data, distort_result
Ejemplo n.º 40
0
def main(args):

    config = config_reader.triplets_afix_config(args.config)

    np.random.seed(seed=config.seed)
    network = importlib.import_module(config.model_def)

    chokepoint_dataset = chokepoint.chokepoint_data(config.chokepoint_still_dir,
                                                    config.chokepoint_video_dir,
                                                    config.chokepoint_pairs)

    fold_list = [([0, 1], [2, 3, 4]),
                 ([1, 2], [3, 4, 0]),
                 ([2, 3], [4, 0, 1]),
                 ([3, 4], [0, 1, 2]),
                 ([4, 0], [1, 2, 3])]

    for fold_idx in range(5):

        print('Fold: {}'.format(fold_idx))

        train_folds, evaluation_folds = fold_list[fold_idx]

        # Train set
        chokepoint_train_set = chokepoint_dataset.get_S2V_dataset(train_folds)
        chokepoint1_paths, chokepoint1_issame = chokepoint_dataset.get_pairs(train_folds)

        # Validation set
        chokepoint2_paths, chokepoint2_issame = chokepoint_dataset.get_pairs(evaluation_folds)

        subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
        log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), 'fold{}_'.format(fold_idx) + subdir)
        if not os.path.isdir(log_dir):  # Create the log directory if it doesn't exist
            os.makedirs(log_dir)
        model_dir = os.path.join(os.path.expanduser(args.models_base_dir), 'fold{}_'.format(fold_idx) + subdir)
        if not os.path.isdir(model_dir):  # Create the model directory if it doesn't exist
            os.makedirs(model_dir)

        # Write arguments to a text file
        facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt'))

        # Store some git revision info in a text file in the log directory
        src_path,_ = os.path.split(os.path.realpath(__file__))
        facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

        print('Model directory: %s' % model_dir)
        print('Log directory: %s' % log_dir)
        if args.pretrained_model:
            print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model))

        if args.lfw_dir:
            print('LFW directory: %s' % args.lfw_dir)
            # Read the file containing the pairs used for testing
            pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
            # Get the paths for the corresponding images
            lfw_paths, lfw_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs)

        with tf.Graph().as_default():
            tf.set_random_seed(args.seed)
            global_step = tf.Variable(0, trainable=False)

            # Placeholder for the learning rate
            learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')

            batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

            phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

            image_paths_placeholder = tf.placeholder(tf.string, shape=(None,3), name='image_paths')
            labels_placeholder = tf.placeholder(tf.int64, shape=(None,3), name='labels')

            input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                        dtypes=[tf.string, tf.int64],
                                        shapes=[(3,), (3,)],
                                        shared_name=None, name=None)
            enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder])

            nrof_preprocess_threads = 4
            images_and_labels = []
            for _ in range(nrof_preprocess_threads):
                filenames, label = input_queue.dequeue()
                images = []
                for filename in tf.unstack(filenames):
                    file_contents = tf.read_file(filename)
                    image = tf.image.decode_image(file_contents, channels=3)

                    if args.random_crop:
                        image = tf.random_crop(image, [args.image_size, args.image_size, 3])
                    else:
                        image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
                    if args.random_flip:
                        image = tf.image.random_flip_left_right(image)

                    #pylint: disable=no-member
                    image.set_shape((args.image_size, args.image_size, 3))
                    images.append(tf.image.per_image_standardization(image))
                images_and_labels.append([images, label])

            image_batch, labels_batch = tf.train.batch_join(
                images_and_labels, batch_size=batch_size_placeholder,
                shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True,
                capacity=4 * nrof_preprocess_threads * args.batch_size,
                allow_smaller_final_batch=True)
            image_batch = tf.identity(image_batch, 'image_batch')
            image_batch = tf.identity(image_batch, 'input')
            labels_batch = tf.identity(labels_batch, 'label_batch')

            # Build the inference graph
            prelogits, _ = network.inference(image_batch, args.keep_probability,
                phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size,
                weight_decay=args.weight_decay)

            embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
            # Split embeddings into anchor, positive and negative and calculate triplet loss
            anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1,3,args.embedding_size]), 3, 1)
            triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha)

            learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step,
                args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True)
            tf.summary.scalar('learning_rate', learning_rate)

            # Calculate the total losses
            regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss')

            # Build a Graph that trains the model with one batch of examples and updates the model parameters
            train_op = facenet.train(total_loss, global_step, args.optimizer,
                learning_rate, args.moving_average_decay, tf.global_variables())

            # Create a saver
            saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

            # Build the summary operation based on the TF collection of Summaries.
            # summary_op = tf.summary.merge_all()

            # Start running operations on the Graph.
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
            sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

            # Initialize variables
            sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder:True})
            sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder:True})

            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            coord = tf.train.Coordinator()
            tf.train.start_queue_runners(coord=coord, sess=sess)

            with sess.as_default():

                if args.pretrained_model:
                    print('Restoring pretrained model: %s' % args.pretrained_model)
                    saver.restore(sess, os.path.expanduser(args.pretrained_model))

                # Training and validation loop
                epoch = 0
                while epoch < args.max_nrof_epochs:
                    step = sess.run(global_step, feed_dict=None)
                    epoch = step // args.epoch_size

                    # Evaluate on COX
                    evaluate(sess, chokepoint1_paths, embeddings, labels_batch, image_paths_placeholder,
                             labels_placeholder,
                             batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op,
                             chokepoint1_issame, 100,
                             2, log_dir, step, summary_writer, args.embedding_size,
                             tag='chokepoint_train')

                    evaluate(sess, chokepoint2_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder,
                                          batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op,
                                          chokepoint2_issame, 100,
                                          3, log_dir, step, summary_writer, args.embedding_size, tag='chokepoint_eval')



                    # Evaluate on LFW
                    # if args.lfw_dir:
                    #     lfw_result = evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder,
                    #                           batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op,
                    #                           lfw_issame, args.batch_size,
                    #                           args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size, tag='lfw')

                    # Train for one epoch
                    train(args, sess, chokepoint_train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch,
                        batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step,
                        embeddings, total_loss, train_op, summary_writer, args.learning_rate_schedule_file,
                        args.embedding_size)

                    # Save variables and the metagraph if it doesn't exist already
                    save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step)

    return model_dir
Ejemplo n.º 41
0
    def vgg16(self, inp):
        """
        load variable from npy to build the VGG

        :param inp: rgb image [batch, height, width, 3] values scaled [0., 255.]
        """

        start_time = time.time()
        PrintWithTime(BarFormat("build model started (VGG-16)"))

        # input is images of [256, 256, 3], random crop and flip to [224, 224,
        # 3]
        distorted_image = tf.stack([
            tf.random_crop(tf.image.random_flip_left_right(each_image),
                           [224, 224, 3]) for each_image in tf.unstack(inp)
        ])

        self.train_layers = []
        self.train_last_layer = []
        self.classifyLastLayer = []

        # Convert RGB to BGR
        red, green, blue = tf.split(axis=3,
                                    num_or_size_splits=3,
                                    value=distorted_image)
        assert red.get_shape().as_list()[1:] == [224, 224, 1]
        assert green.get_shape().as_list()[1:] == [224, 224, 1]
        assert blue.get_shape().as_list()[1:] == [224, 224, 1]
        bgr = tf.concat(axis=3,
                        values=[
                            blue - VGG_MEAN[0],
                            green - VGG_MEAN[1],
                            red - VGG_MEAN[2],
                        ])
        assert bgr.get_shape().as_list()[1:] == [224, 224, 3]

        radius = 2
        alpha = 2e-05
        beta = 0.75
        bias = 1.0

        self.conv1_1 = self.conv_layer(bgr, "conv1_1")
        self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
        self.pool1 = self.max_pool(self.conv1_2, 'pool1')
        self.lrn1 = tf.nn.local_response_normalization(self.pool1,
                                                       depth_radius=radius,
                                                       alpha=alpha,
                                                       beta=beta,
                                                       bias=bias)

        self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
        self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
        self.pool2 = self.max_pool(self.conv2_2, 'pool2')
        self.lrn2 = tf.nn.local_response_normalization(self.pool2,
                                                       depth_radius=radius,
                                                       alpha=alpha,
                                                       beta=beta,
                                                       bias=bias)

        self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
        self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
        self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
        self.pool3 = self.max_pool(self.conv3_3, 'pool3')

        self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
        self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
        self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
        self.pool4 = self.max_pool(self.conv4_3, 'pool4')

        self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
        self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
        self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
        self.pool5 = self.max_pool(self.conv5_3, 'pool5')

        self.fc6 = self.fc_layer(self.pool5, "fc6")
        assert self.fc6.get_shape().as_list()[1:] == [4096]
        self.relu6 = tf.nn.dropout(tf.nn.relu(
            self.fc6), 0.5) if self._train else tf.nn.relu(self.fc6)

        self.fc7 = self.fc_layer(self.relu6, "fc7")
        self.relu7 = tf.nn.dropout(tf.nn.relu(
            self.fc7), 0.5) if self._train else tf.nn.relu(self.fc7)
        ''' ADD ONE MORE DENSE 4096 -> D '''
        # FC8
        # Output output_dim
        with tf.name_scope('fc8') as scope:
            fc8w = tf.Variable(tf.random_normal([4096, 300],
                                                dtype=tf.float32,
                                                stddev=1e-2),
                               name='weights')
            fc8b = tf.Variable(tf.constant(0.0, shape=[300], dtype=tf.float32),
                               name='biases')
            self.fc8l = tf.nn.bias_add(tf.matmul(self.relu7, fc8w), fc8b)
            self.fc8 = tf.nn.tanh(self.fc8l)
            self.train_last_layer += [fc8w, fc8b]
        # Classify
        # Output label_num
        with tf.name_scope('cls') as scope:
            clsw = tf.Variable(tf.random_normal([4096, self.n_class],
                                                dtype=tf.float32,
                                                stddev=1e-2),
                               name='weights')
            clsb = tf.Variable(tf.constant(0.0,
                                           shape=[self.n_class],
                                           dtype=tf.float32),
                               name='biases')
            self.cls = tf.nn.bias_add(tf.matmul(self.relu7, clsw), clsb)
            self.classifyLastLayer += [clsw, clsb]

        PrintWithTime(
            ("build model finished: %ds" % (time.time() - start_time)))
Ejemplo n.º 42
0
    def read_and_decode(self, filename_queue):
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)

        _features = {
            feature: tf.FixedLenFeature([], tf.string)
            for feature in self.features.keys()
        }

        features = tf.parse_single_example(
            serialized_example,
            # Defaults are not specified since both keys are required.
            features=_features)

        outputs = {}

        for feature_name, feature in self.features.items():
            # Convert from a scalar string tensor (whose single string has
            image = tf.decode_raw(features[feature_name],
                                  tf.uint8)  # Change to tf.int8
            if 'depth' in feature:
                shape = [
                    feature['in_width'], feature['in_width'], feature['depth']
                ]
            elif 'shape' in feature:
                shape = feature['shape']
            else:
                shape = [feature['in_width'], feature['in_width']]

            raw_shape = np.prod(shape)
            image.set_shape([raw_shape])
            image = tf.reshape(image, shape)
            outputs[feature_name] = image
        outputs = {
            k: tf.cast(v, tf.float32) / 255.0
            for k, v in outputs.items()
        }
        #return outputs
        if len(shape) == 2:
            outputs = {k: tf.expand_dims(v, -1) for k, v in outputs.items()}

        # Rotation - Random Flip left, right, random, up down
        if self.flipping:
            outputs = {
                k: tf.image.random_flip_up_down(v, seed=0)
                for k, v in outputs.items()
            }
            outputs = {
                k: tf.image.random_flip_left_right(v, seed=1)
                for k, v in outputs.items()
            }

        if self.random_brightness:
            max_delta = 0.1
            image_name = self.features.keys()[0]
            outputs[image_name] = tf.image.random_brightness(
                outputs[image_name], max_delta, seed=0)
            outputs[image_name] = tf.image.random_contrast(outputs[image_name],
                                                           0.7,
                                                           1,
                                                           seed=0)

        outputs = {k: tf.squeeze(v) for k, v in outputs.items()}
        # Rotation by degree
        if self.rotating:
            angle = tf.random_uniform([1],
                                      -self.max_degree,
                                      self.max_degree,
                                      dtype=tf.float32)
            outputs = {
                k: tip.rotate_image(v, angle)
                for k, v in outputs.items()
            }

        # Translation Invariance - Crop 712 - > 512 and 324 -> 224
        if self.random_crop:
            outputs = {
                k: tf.random_crop(
                    v, [self.features[k]['width'], self.features[k]['width']],
                    seed=10)
                for k, v in outputs.items()
            }
        else:
            outputs = {
                k: tip.central_crop(
                    v, [self.features[k]['width'], self.features[k]['width']])
                for k, v in outputs.items()
            }

        # Convert from [0, 255] -> [-0.5, 0.5] floats.
        outputs = {
            k: tf.cast(v, tf.float32) / 255.0
            for k, v in outputs.items()
        }
        outputs = list(OrderedDict(sorted(outputs.items())).values())
        return outputs
Ejemplo n.º 43
0
def get_batch(image, label, batch_size, capacity):
    '''
    Args:
        image: list type
        label: list type
        image_W: image width
        image_H: image height
        batch_size: batch size
        capacity: the maximum elements in queue
    Returns:
        image_batch: 4D tensor [batch_size, width, height, 3], dtype=tf.float32
        #原教程为rgb,3 channels;现要用gray,所以是1 channel?
        label_batch: 1D tensor [batch_size], dtype=tf.int32
    '''

    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)

    # make an input queue
    input_queue = tf.train.slice_input_producer([image, label])

    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])
    image = tf.image.decode_jpeg(image_contents, channels=3)

    ######################################
    # data argumentation should go to here
    ######################################

    #    I think it will be better if we pre-process the images first
    #    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)

    #    data argumentation

    image = tf.random_crop(image,
                           [196, 196, 3])  # tensor shape not equal,how ?
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_contrast(image, lower=0.3, upper=1.0)
    image = tf.image.random_brightness(image, max_delta=0.2)

    #    image = tf.image.random_hue(image, max_delta=0.05)
    #    image = tf.image.random_saturation(image, lower=0.0, upper=2.0)

    # Limit the image pixels between [0, 1] in case of overflow.
    #    image = tf.minimum(image, 1.0)
    #    image = tf.maximum(image, 0.0)

    # if you want to test the generated batches of images, you might want to comment the following line.
    # !!注意:如果想看到正常的图片,请注释掉(标准化)和 (image_batch = tf.cast(image_batch, tf.float32))
    # 但是训练时不要注释掉!!!!!
    image = tf.image.per_image_standardization(image)

    image_batch, label_batch = tf.train.batch([image, label],
                                              batch_size=batch_size,
                                              num_threads=64,
                                              capacity=capacity)

    #you can also use shuffle_batch
    #    image_batch, label_batch = tf.train.shuffle_batch([image,label],
    #                                                      batch_size=BATCH_SIZE,
    #                                                      num_threads=64,
    #                                                      capacity=CAPACITY,
    #                                                      min_after_dequeue=CAPACITY-1)

    label_batch = tf.reshape(label_batch, [batch_size])
    image_batch = tf.cast(image_batch, tf.float32)

    return image_batch, label_batch
Ejemplo n.º 44
0
def distorted_inputs(
        data_dir,
        batch_size,
        distort=2,
        num_examples_per_epoch_for_train=NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN):
    """Construct distorted input for training using the Reader ops.

  Args:
    data_dir: Path to the dataset data directory.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, INPUT_IMAGE_CHANNELS] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
    # for CIFAR-10
    print("From within distorted_inputs, data_dir = {}here".format(data_dir))
    #filenames = [os.path.join(data_dir, 'patches_%d.bin' % i) for i in xrange(0, 8)]
    #filenames = [os.path.join(data_dir, 'patches_train_%d.bin' % i) for i in xrange(0, 1)]
    #filenames = [os.path.join(data_dir, 'train_crop.bin')]
    filenames = [
        os.path.join(data_dir, 'train_%d.bin' % i) for i in xrange(0, 10)
    ]

    print("Expected filenames: {}".format(filenames))

    myfilenames = []
    for f in filenames:
        if tf.gfile.Exists(f):
            myfilenames.append(f)

    print("Found filenames: {}".format(myfilenames))

    filenames = myfilenames
    if len(filenames) == 0:
        raise ValueError('Failed to find any files to process')

    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    # Read examples from files in the filename queue.
    read_input = read_dataset(filename_queue)
    reshaped_image = tf.cast(read_input.uint8image, tf.float32)

    height = IMAGE_HEIGHT
    width = IMAGE_WIDTH

    # Image processing for training the network. Note the many random
    # distortions applied to the image.

    if distort == 1:
        # Randomly crop a [height, width] section of the image.
        distorted_image = tf.random_crop(reshaped_image,
                                         [height, width, INPUT_IMAGE_CHANNELS])

        # Randomly flip the image horizontally.
        distorted_image = tf.image.random_flip_left_right(distorted_image)

        # Because these operations are not commutative, consider randomizing
        # the order their operation.
        distorted_image = tf.image.random_brightness(distorted_image,
                                                     max_delta=63)
        distorted_image = tf.image.random_contrast(distorted_image,
                                                   lower=0.2,
                                                   upper=1.8)
    elif distort == 2:
        distorted_image = tf.random_crop(reshaped_image,
                                         [height, width, INPUT_IMAGE_CHANNELS])

    else:
        distorted_image = tf.image.resize_image_with_crop_or_pad(
            reshaped_image, width, height)

    # Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_standardization(distorted_image)

    # Ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(num_examples_per_epoch_for_train *
                             min_fraction_of_examples_in_queue)
    print('Filling queue with %d images before starting to train. '
          'This will take a few minutes.' % min_queue_examples)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image,
                                           read_input.label,
                                           min_queue_examples,
                                           batch_size,
                                           shuffle=True)
Ejemplo n.º 45
0
def read_image_from_filename(filename,
                             batch_size,
                             num_threads=4,
                             output_height=128,
                             output_width=128,
                             min_after_dequeue=5000,
                             num_channels=1,
                             use_shuffle_batch=True,
                             scope=None):
    with tf.variable_scope(scope, "image_producer"):
        textReader = tf.TextLineReader()

        csv_path = tf.train.string_input_producer([filename])
        _, csv_content = textReader.read(csv_path)
        artifact_filenames, reference_filenames, quality = tf.decode_csv(
            csv_content, record_defaults=[[""], [""], [""]])

        # when training use_shuffle_batch must be True
        # else (e.g. evaluation) evaluation code runs in single epoch and
        # use tf.train.batch instead tf.train.shuffle_batch
        if use_shuffle_batch:
            num_epochs = None
        else:
            num_epochs = 1
        """
        # this method is from https://stackoverflow.com/q/34340489
        # use tf.train.slice_input_producer instead of string_input_producer
        # and tf.read_file instead of tf.WholeFileReader.read
        input_queue = tf.train.slice_input_producer(
            [artifact_filenames, reference_filenames, labels],
            num_epochs=num_epochs, shuffle=False)

        artifact_data  = tf.read_file(input_queue[0])
        reference_data = tf.read_file(input_queue[1])
        label_data     = tf.read_file(input_queue[2])
        """
        artifact_data = tf.read_file(artifact_filenames)
        reference_data = tf.read_file(reference_filenames)
        artifact_im = tf.image.decode_png(artifact_data, channels=num_channels)
        reference_im = tf.image.decode_png(reference_data,
                                           channels=num_channels)

        # concat all images in channel axis to randomly crop together
        concated_im = tf.concat([artifact_im, reference_im], axis=2)

        if use_shuffle_batch:
            concated_im = tf.random_crop(
                concated_im,
                [output_height, output_width, num_channels + num_channels])
        elif output_height > 0 and output_width > 0 and not use_shuffle_batch:
            concated_im = tf.image.resize_image_with_crop_or_pad(
                concated_im, output_height, output_width)

        if use_shuffle_batch:
            capacity = min_after_dequeue + 10 * batch_size
            im_batch = tf.train.shuffle_batch(
                [concated_im],
                batch_size=batch_size,
                capacity=capacity,
                num_threads=num_threads,
                min_after_dequeue=min_after_dequeue,
                allow_smaller_final_batch=True,
                name="shuffle_batch")
        else:
            im_batch, label_batch = tf.train.batch(
                [concated_im],
                batch_size=batch_size,
                num_threads=num_threads,
                allow_smaller_final_batch=True,
                name="batch")

        # split concatenated data
        artifact_batch, reference_batch = tf.split(
            im_batch, [num_channels, num_channels], axis=3)
        artifact_batch = tf.cast(artifact_batch, tf.float32) / 127.5 - 1.0
        reference_batch = tf.cast(reference_batch, tf.float32) / 127.5 - 1.0

        return artifact_batch, reference_batch
Ejemplo n.º 46
0
def train_data(input_data, is_training):
    a = tf.pad(input_data, [[0, 0], [padding_pixel, padding_pixel],
                            [padding_pixel, padding_pixel], [0, 0]])
    a = tf.random_crop(a, size=[BATCH_SIZE, 32, 32, 3])
    return tf.cond(is_training, lambda: a, lambda: input_data)
Ejemplo n.º 47
0
		def _random_distord(images, labels):
			images = tf.image.random_flip_left_right(images)
			images = tf.image.random_flip_up_down(images)
			# angle = tf.random_uniform(shape=(1,), minval=0, maxval=90)
			# images = tf.contrib.image.rotate(images, angle * math.pi / 180, interpolation='BILINEAR')

			# Rotation and transformation
			# print(images.shape)  # = (?, 299, 299, ?)
			print('images.shape:', images.shape)	  
			w, h = IMAGE_SIZE
			a = max(w, h)
			d = math.ceil(a * (math.sqrt(2) - 1) / 2)
			print('paddings d =', d)
			paddings = tf.constant([[0, 0], [d, d], [d, d], [0, 0]])
			images = tf.pad(images, paddings, "SYMMETRIC")
			#images = tf.image.resize_image_with_crop_or_pad(images, w+d, h+d)
			print('images.shape:', images.shape)
			angle = tf.random_uniform(shape=(1,), minval=0, maxval=settings.rotation_max_angle)
			images = tf.contrib.image.rotate(images, angle * math.pi / 180, interpolation='BILINEAR')
			#images = tf.image.crop_to_bounding_box(images, d, d, s+d, s+d)
			
			# Transformation
			#transform1 = tf.constant([1.0, 0.2, -30.0, 0.2, 1.0, 0.0, 0.0, 0.0], dtype=tf.float32)			
			# transform is  vector of length 8 or tensor of size N x 8
			# [a0, a1, a2, b0, b1, b2, c0, c1]			
			a0 = tf.constant([1.0])
			a1 = tf.random_uniform(shape=(1,), minval=0.0, maxval=settings.transform_maxval)
			a2 = tf.constant([-30.0])
			b0 = tf.random_uniform(shape=(1,), minval=0.0, maxval=settings.transform_maxval)
			b1 = tf.constant([1.0])
			b2 = tf.constant([-30.0])
			c0 = tf.constant([0.0])
			c1 = tf.constant([0.0])
			transform1 = tf.concat(axis=0, values=[a0, a1, a2, b0, b1, b2, c0, c1])
			#transform = tf.tile(tf.expand_dims(transform1, 0), [batch, 1])
			#print('Added transformations:', transform)
			images = tf.contrib.image.transform(images, transform1)			
			images = tf.image.resize_image_with_crop_or_pad(images, h, w)
			# ---			
			zoom = 1.1
			w_crop = math.ceil(w / zoom)
			h_crop = math.ceil(h / zoom)
			#batch_size = int(images.shape[0])
			#print(images.shape)
			batch_size = tf.size(images) / (3*h*w)
			images = tf.random_crop(images, [batch_size, h_crop, w_crop, 3])

			images = tf.image.resize_images(images, [h, w])			
			# ---
			# end of Rotation and Transformation block
			
			images = tf.image.random_hue(images, max_delta=0.05)
			images = tf.image.random_contrast(images, lower=0.9, upper=1.5)
			images = tf.image.random_brightness(images, max_delta=0.1)
			images = tf.image.random_saturation(images, lower=1.0, upper=1.5)

			#images = tf.image.per_image_standardization(images)
			images = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), images)

			#images = tf.minimum(images, 1.0)
			#images = tf.maximum(images, 0.0)

			#images.set_shape([None, None, None, 3])
			images.set_shape([None, 299, 299, 3])
			return images, labels
Ejemplo n.º 48
0
def get_batch(paths, options):
    """Returns a data split of the RECOLA dataset, which was saved in tfrecords format.
    Args:
        split_name: A train/test/valid split name.
    Returns:
        The raw audio examples and the corresponding arousal/valence
        labels.
    """
    shuffle = options['shuffle']
    batch_size = options['batch_size']
    num_classes = options['num_classes']
    crop_size = options['crop_size']
    horizontal_flip = options['horizontal_flip']

    # root_path = Path(dataset_dir) / split_name
    # paths = [str(x) for x in root_path.glob('*.tfrecords')]

    filename_queue = tf.train.string_input_producer(paths, shuffle=shuffle)

    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'video':
                                           tf.FixedLenFeature([], tf.string),
                                           'label':
                                           tf.FixedLenFeature([], tf.int64)
                                       })

    video = tf.cast(tf.decode_raw(features['video'], tf.uint8),
                    tf.float32)  #/ 255.
    label = features['label']  #tf.decode_raw(features['label'], tf.int64)

    # Number of threads should always be one, in order to load samples
    # sequentially.
    videos, labels = tf.train.batch([video, label],
                                    batch_size,
                                    num_threads=1,
                                    capacity=1000,
                                    dynamic_pad=True)

    videos = tf.reshape(videos, (batch_size, 29, 118, 118, 1))
    #labels = tf.reshape(labels, (batch_size,  1))
    labels = tf.contrib.layers.one_hot_encoding(labels, num_classes)

    # if is_training:
    # resized_image = tf.image.resize_images(frame, [crop_size, 110])
    # random cropping
    if crop_size is not None:
        videos = tf.random_crop(videos,
                                [batch_size, 29, crop_size, crop_size, 1])
    # random left right flip
    if horizontal_flip:
        sample = tf.random_uniform(shape=[],
                                   minval=0,
                                   maxval=1,
                                   dtype=tf.float32)
        option = tf.less(sample, 0.5)
        videos = tf.cond(option,
                         lambda: tf.map_fn(video_left_right_flip, videos),
                         lambda: tf.map_fn(tf.identity, videos))
        # lambda: video_left_right_flip(videos),
        # lambda: tf.identity(videos))
    videos = normalize(
        videos)  #tf.cast(videos, tf.float32) * (1. / 255.) - 0.5

    return videos, labels
Ejemplo n.º 49
0
def image_augmentations(image,
                        data_augmentations,
                        model_input_image_size,
                        label=None):
    """Coordinating image augmentations for both image and heatmap."""
    im_size = [int(x) for x in image.get_shape()]
    im_size_check = np.any(
        np.less_equal(model_input_image_size[:2], im_size[:2]))
    if data_augmentations is not None:
        # Pixel/image-level augmentations
        if 'singleton' in data_augmentations:
            image = tf.expand_dims(image, axis=-1)
            print 'Adding singleton dimension to image.'
        if 'singleton_label' in data_augmentations:
            label = tf.expand_dims(label, axis=-1)
            print 'Adding singleton dimension to label.'
        if 'bsds_crop' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            # intermediate_size = [171, 256, 3]
            # intermediate_size = [256, 384, 3]
            intermediate_size = [324, 484, 3]
            image = tf.image.resize_image_with_crop_or_pad(
                image, intermediate_size[0], intermediate_size[1])
            label = tf.image.resize_image_with_crop_or_pad(
                label, intermediate_size[0], intermediate_size[1])
            print 'Applying BSDS crop.'
        if 'uint8_rescale' in data_augmentations:
            image = tf.cast(image, tf.float32) / 255.
            print 'Applying uint8 rescale to the image.'
        if 'uint8_rescale_label' in data_augmentations:
            label = tf.cast(label, tf.float32) / 255.
            print 'Applying uint8 rescale to the label.'
        if 'uint8_rescale_-1_1' in data_augmentations:
            image = 2 * (tf.cast(image, tf.float32) / 255.) - 1
            print 'Applying uint8 rescale.'
        if 'image_to_bgr' in data_augmentations:
            image = tf.stack([image[:, :, 2], image[:, :, 1], image[:, :, 0]],
                             axis=-1)
        if 'pascal_normalize' in data_augmentations:
            image = image - [123.68, 116.78, 103.94]
        if 'random_contrast' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
            print 'Applying random contrast.'
        if 'random_brightness' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image = tf.image.random_brightness(image, max_delta=63.)
            print 'Applying random brightness.'
        if 'grayscale' in data_augmentations and im_size_check:
            # image = tf.image.rgb_to_grayscale(image)
            image = tf.expand_dims(image[:, :, 0], axis=-1)  # ABOVE INSTEAD?
            print 'Converting to grayscale.'
        # Affine augmentations
        if 'rotate' in data_augmentations and im_size_check:
            max_theta = 22.
            angle_rad = (max_theta / 180.) * math.pi
            angles = tf.random_uniform([], -angle_rad, angle_rad)
            transform = tf.contrib.image.angles_to_projective_transforms(
                angles, im_size[0], im_size[1])
            image = tf.contrib.image.transform(
                image,
                tf.contrib.image.compose_transforms(transform),
                interpolation='BILINEAR')  # or 'NEAREST'
            print 'Applying random rotate.'
        if 'rotate_image_label' in data_augmentations and im_size_check:
            max_theta = 30.
            angle_rad = (max_theta / 180.) * math.pi
            angles = tf.random_uniform([], -angle_rad, angle_rad)
            transform = tf.contrib.image.angles_to_projective_transforms(
                angles, im_size[0], im_size[1])
            image = tf.contrib.image.transform(
                image,
                tf.contrib.image.compose_transforms(transform),
                interpolation='BILINEAR')  # or 'NEAREST'
            label = tf.contrib.image.transform(
                label,
                tf.contrib.image.compose_transforms(transform),
                interpolation='BILINEAR')  # or 'NEAREST'
            print 'Applying random rotate.'
        if 'random_scale_crop_image_label' in data_augmentations\
                and im_size_check:
            scale_choices = tf.convert_to_tensor([1., 1.02, 1.04, 1.06, 1.08])
            samples = tf.multinomial(tf.log([tf.ones_like(scale_choices)]), 1)
            image_shape = image.get_shape().as_list()
            scale = scale_choices[tf.cast(samples[0][0], tf.int32)]
            scale_tf = tf.cast(
                tf.round(
                    np.asarray(model_input_image_size[:2]).astype(np.float32) *
                    scale), tf.int32)
            combined = tf.concat([image, label], axis=-1)
            combo_shape = combined.get_shape().as_list()
            combined_crop = tf.random_crop(
                combined, tf.concat([scale_tf, [combo_shape[-1]]], 0))
            combined_resize = tf.squeeze(tf.image.resize_bicubic(
                tf.expand_dims(combined_crop, axis=0),
                model_input_image_size[:2],
                align_corners=True),
                                         axis=0)
            image = combined_resize[:, :, :image_shape[-1]]
            label = combined_resize[:, :, image_shape[-1]:]
            image.set_shape(model_input_image_size)
            label.set_shape(model_input_image_size[:2] +
                            [combo_shape[-1] - model_input_image_size[-1]])
        if 'rc_res' in data_augmentations and im_size_check:
            image = random_crop(image, model_input_image_size)
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            ms = [x // 2 for x in model_input_image_size]
            image = resize_image_label(im=image,
                                       model_input_image_size=ms,
                                       f='bicubic')
            print 'Applying random crop and resize.'
        if 'cc_res' in data_augmentations and im_size_check:
            image = center_crop(image, model_input_image_size)
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            ms = [x // 2 for x in model_input_image_size]
            image = resize_image_label(im=image,
                                       model_input_image_size=ms,
                                       f='bicubic')
            print 'Applying center crop and resize.'
        if 'random_crop' in data_augmentations and im_size_check:
            image = random_crop(image, model_input_image_size)
            print 'Applying random crop.'
        if 'center_crop' in data_augmentations and im_size_check:
            image = center_crop(image, model_input_image_size)
            print 'Applying center crop.'
        if 'random_crop_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = crop_image_label(image=image,
                                            label=label,
                                            size=model_input_image_size,
                                            crop='random')
        if 'center_crop_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = crop_image_label(image=image,
                                            label=label,
                                            size=model_input_image_size,
                                            crop='center')
        if 'resize' in data_augmentations and im_size_check:
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='bicubic')
            print 'Applying area resize.'
        if 'jk_resize' in data_augmentations and im_size_check:
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = tf.image.resize_image_with_crop_or_pad(
                image, model_input_image_size[0], model_input_image_size[1])
            print 'Applying area resize.'
        if 'resize_and_crop' in data_augmentations and im_size_check:
            model_input_image_size_1 = np.asarray(
                model_input_image_size[:2]) + 28
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size_1,
                f='area')
            image = center_crop(image, model_input_image_size)
            print 'Applying area resize.'
        if 'resize_nn' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='nearest')
            print 'Applying nearest resize.'
        if 'resize_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='bicubic')
            label = resize_image_label(
                im=label,
                model_input_image_size=model_input_image_size,
                f='bicubic')
            print 'Applying bilinear resize.'
        elif 'resize_nn_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='nearest')
            label = resize_image_label(
                im=label,
                model_input_image_size=model_input_image_size,
                f='nearest')
            print 'Applying nearest resize.'
        else:
            pass
        if 'left_right' in data_augmentations:
            image = image_flip(image, direction='left_right')
            print 'Applying random flip left-right.'
        if 'up_down' in data_augmentations:
            image = image_flip(image, direction='up_down')
            print 'Applying random flip up-down.'
        if 'lr_flip_image_label' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = lr_flip_image_label(image, label)
        if 'ud_flip_image_label' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = ud_flip_image_label(image, label)
        if 'gaussian_noise' in data_augmentations:
            im_shape = image.get_shape().as_list()
            assert len(im_shape) == 3, '4D not implemented yet.'
            sigma = 1. / 10.
            mu = 0.
            image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma)
            print 'Applying gaussian noise.'
        if 'gaussian_noise_small' in data_augmentations:
            im_shape = image.get_shape().as_list()
            assert len(im_shape) == 3, '4D not implemented yet.'
            sigma = 1. / 20.
            mu = 0.
            image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma)
            print 'Applying gaussian noise.'
        if 'calculate_rate_time_crop' in data_augmentations:
            im_shape = image.get_shape().as_list()
            minval = im_shape[0] // 3
            time_crop = tf.random_uniform([],
                                          minval=minval,
                                          maxval=im_shape[0],
                                          dtype=tf.int32)

            # For now always pull from the beginning
            indices = tf.range(0, time_crop, dtype=tf.int32)
            selected_image = tf.gather(image, indices)
            padded_image = tf.zeros([im_shape[0] - time_crop] + im_shape[1:],
                                    dtype=selected_image.dtype)

            # Randomly concatenate pad to front or back
            image = tf.cond(pred=tf.greater(
                tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32),
                0.5),
                            true_fn=lambda: tf.concat(
                                [selected_image, padded_image], axis=0),
                            false_fn=lambda: tf.concat(
                                [padded_image, selected_image], axis=0))
            image.set_shape(im_shape)

            # Convert label to rate
            label = label / im_shape[0]
        if 'calculate_rate' in data_augmentations:
            label = label / image.get_shape().as_list()[0]
            print 'Applying rate transformation.'
        if 'threshold' in data_augmentations:
            image = tf.cast(tf.greater(image, 0.1), tf.float32)
            print 'Applying threshold.'
        if 'nonzero_label' in data_augmentations:
            label = tf.cast(tf.greater(label, 0.2), tf.float32)
            print 'Applying threshold.'
        if 'zero_one' in data_augmentations:
            image = tf.minimum(tf.maximum(image, 0.), 1.)
            print 'Applying threshold.'
        if 'timestep_duplication' in data_augmentations:
            image = tf.stack([image for iid in range(7)])
            print 'Applying timestep duplication.'
        if 'per_image_standardization' in data_augmentations:
            image = tf.image.per_image_standardization(image)
            print 'Applying per-image zscore.'
        if 'flip_polarity' in data_augmentations:
            image = tf.abs(image - 1.)
        if 'NCHW' in data_augmentations:
            image = tf.transpose(image, (2, 0, 1))
    else:
        assert len(image.get_shape()) == 3, '4D not implemented yet.'
        image = tf.image.resize_image_with_crop_or_pad(
            image, model_input_image_size[0], model_input_image_size[1])
    return image, label
def main(args):

    network = importlib.import_module(args.model_def, 'inference')

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    with open(os.path.join(model_dir, 'args.txt'), 'w') as f:
        for arg in vars(args):
            f.write(arg + ' ' + str(getattr(args, arg)) + '\n')

    # Store some git revision info in a text file in the log directory
    if not args.no_store_revision_info:
        src_path, _ = os.path.split(os.path.realpath(__file__))
        facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    train_set = facenet.get_dataset(args.data_dir)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    if args.pretrained_model:
        print('Pre-trained model: %s' %
              os.path.expanduser(args.pretrained_model))

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Placeholder for the learning rate
        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 3),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 3),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(3, ), (3, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder])

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                #image = tf.image.decode_png(file_contents)
                image = tf.image.decode_jpeg(file_contents, channels=3)
                if args.random_rotate:
                    image = tf.py_func(facenet.random_rotate_image, [image],
                                       tf.uint8)
                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                image = tf.cast(image, tf.float32)
                #image = tf.image.per_image_standardization(image)
                distorted_image = tf.image.random_brightness(image,
                                                             max_delta=32)
                image = tf.image.random_contrast(distorted_image,
                                                 lower=0.5,
                                                 upper=1.5)
                #images.append(tf.image.per_image_standardization(image))
                images.append(image)
            images_and_labels.append([images, label])

        image_batch, labels_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)

        batch_norm_params = {
            # Decay for the moving averages
            'decay': 0.995,
            # epsilon to prevent 0s in variance
            'epsilon': 0.001,
            # force in-place updates of mean and variance estimates
            'updates_collections': None,
            # Moving averages ends up in the trainable variables collection
            'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES],
            # Only update statistics during training mode
            'is_training': phase_train_placeholder
        }
        # Build the inference graph
        prelogits, _ = network.inference(image_batch,
                                         args.keep_probability,
                                         phase_train=phase_train_placeholder,
                                         weight_decay=args.weight_decay)
        #pre_embeddings = slim.fully_connected(prelogits, args.embedding_size, activation_fn=None,
        #        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
        #        weights_regularizer=slim.l2_regularizer(args.weight_decay),
        #        normalizer_fn=slim.batch_norm,
        #        normalizer_params=batch_norm_params,
        #        scope='Bottleneck', reuse=False)
        pre_embeddings = _fully_connected(prelogits,
                                          args.embedding_size,
                                          name='Bottleneck')
        embeddings = tf.nn.l2_normalize(pre_embeddings,
                                        1,
                                        1e-10,
                                        name='embeddings')
        # Split embeddings into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1)
        triplet_loss = facenet.triplet_loss(anchor, positive, negative,
                                            args.alpha)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([triplet_loss] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables())

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # Initialize variables
        sess.run(tf.global_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})
        sess.run(tf.local_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})

        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if args.pretrained_model:
                print('Restoring pretrained model: %s' % args.pretrained_model)
                saver.restore(sess, os.path.expanduser(args.pretrained_model))

            # Training and validation loop
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, train_set, epoch, image_paths_placeholder,
                      labels_placeholder, labels_batch, batch_size_placeholder,
                      learning_rate_placeholder, phase_train_placeholder,
                      enqueue_op, input_queue, global_step, embeddings,
                      total_loss, train_op, summary_op, summary_writer,
                      args.learning_rate_schedule_file, args.embedding_size,
                      anchor, positive, negative, triplet_loss)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

                evaluate_Training(sess, train_set, embeddings, labels_batch,
                                  image_paths_placeholder, labels_placeholder,
                                  batch_size_placeholder,
                                  learning_rate_placeholder,
                                  phase_train_placeholder, enqueue_op,
                                  args.batch_size, log_dir, step,
                                  summary_writer, args.embedding_size)

                # Evaluate on LFW
                #if args.lfw_dir:
                #    evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder,
                #            batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size,
                #            args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size)

    sess.close()
    return model_dir
Ejemplo n.º 51
0
def input_pipeline(mode, batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS):
    with tf.name_scope('img_pipeline'):
        if mode == 'train':
            filenames = [TRAIN_FILENAME]
            image_feature = 'train/image'
            label_feature = 'train/label'
        else:
            filenames = [VAL_FILENAME]
            image_feature = 'val/image'
            label_feature = 'val/label'

        feature = {
            image_feature: tf.FixedLenFeature([], tf.string),
            label_feature: tf.FixedLenFeature([], tf.int64)
        }

        # Create a list of filenames and pass it to a queue
        filename_queue = tf.train.string_input_producer(filenames,
                                                        num_epochs=NUM_EPOCHS +
                                                        1)
        # Define a reader and read the next record
        options = tf_record.TFRecordOptions(
            compression_type=tf_record.TFRecordCompressionType.GZIP)
        reader = tf.TFRecordReader(options=options)
        _, serialized_example = reader.read(filename_queue)
        # Decode the record read by the reader
        features = tf.parse_single_example(serialized_example,
                                           features=feature)
        # Convert the image data from string back to the numbers
        image = tf.decode_raw(features[image_feature], tf.uint8)

        # Cast label data into one_hot encoded
        label = tf.cast(features[label_feature], tf.int32)
        label = tf.one_hot(label, NUM_CLASSES)
        # Reshape image data into the original shape
        image = tf.reshape(image, [256, 256, 3])

        # Any preprocessing here ...
        # 1. random cropping 224x224
        # 2. random LR-flipping
        image = tf.random_crop(image, [224, 224, 3])
        image = tf.image.random_flip_left_right(image)

        #print_features(image)

        # Creates batches by randomly shuffling tensors
        # min_after_dequeue defines how big a buffer we will randomly sample
        #   from -- bigger means better shuffling but slower start up and more
        #   memory used.
        # capacity must be larger than min_after_dequeue and the amount larger
        #   determines the maximum we will prefetch.  Recommendation:
        #   min_after_dequeue + (num_threads + a small safety margin) * batch_size
        min_after_dequeue = 100
        num_threads = 6
        capacity = min_after_dequeue + (num_threads + 2) * BATCH_SIZE
        images, labels = tf.train.shuffle_batch(
            [image, label],
            batch_size=BATCH_SIZE,
            capacity=capacity,
            num_threads=num_threads,
            min_after_dequeue=min_after_dequeue)

        #print("input_pipeline will return now.")
        return images, labels
Ejemplo n.º 52
0
def build_graph(train):
    with tf.device("/cpu:0"):
        with tf.name_scope('X'):
            x = tf.placeholder(tf.float32, [None, 284, 284, 1], name='x')
            mlp = x

        if train:
            with tf.name_scope('RANDOM-CROP-FLIP'):
                crop_x = tf.map_fn(lambda img: tf.random_crop(img, [272, 272, 1]), mlp)
                # crop_x = tf.map_fn(lambda img: tf.image.random_flip_up_down(img), crop_x)  # 使用random_flip_up_down会影响逐pose可视化X-Y-MASK 翻转改在数据加载中先做一遍增广 需要两倍内存
                mlp = crop_x
        else:
            with tf.name_scope('CENTER-CROP'):
                crop_x = tf.map_fn(lambda img: tf.image.resize_image_with_crop_or_pad(img, 272, 272), mlp)
                mlp = crop_x

    with tf.name_scope('CONV-1'):
        c1 = 16
        res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c1-1]])
        mlp = conv(mlp, weight([3, 3, 1, c1], name='w11')) + positive_bias([c1], name='b11')
        mlp = tf.nn.relu(mlp, name='conv1')
        mlp = conv(mlp, weight([3, 3, c1, c1], name='w12')) + positive_bias([c1], name='b12')
        mlp = tf.nn.relu(mlp, name='conv2')
        mlp = conv(mlp, weight([3, 3, c1, c1], name='w13')) + positive_bias([c1], name='b13')
        mlp = tf.nn.relu(mlp, name='conv3')
        # mlp = conv(mlp, weight([3, 3, c1, c1], name='w14')) + positive_bias([c1], name='b14')
        # mlp = tf.nn.relu(mlp, name='conv4')
        mlp = tf.add(mlp, res, name='res')
        mlp = pool(mlp, name='pool')

    with tf.name_scope('CONV-2'):
        c2 = 32
        res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c2-c1]])
        mlp = conv(mlp, weight([3, 3, c1, c2], name='w21')) + positive_bias([c2], name='b21')
        mlp = tf.nn.relu(mlp, name='conv1')
        mlp = conv(mlp, weight([3, 3, c2, c2], name='w22')) + positive_bias([c2], name='b22')
        mlp = tf.nn.relu(mlp, name='conv2')
        mlp = conv(mlp, weight([3, 3, c2, c2], name='w23')) + positive_bias([c2], name='b23')
        mlp = tf.nn.relu(mlp, name='conv3')
        # mlp = conv(mlp, weight([3, 3, c2, c2], name='w24')) + positive_bias([c2], name='b24')
        # mlp = tf.nn.relu(mlp, name='conv4')
        mlp = tf.add(mlp, res, name='res')
        mlp = pool(mlp, name='pool')

    with tf.name_scope('CONV-3'):
        c3 = 64
        res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c3-c2]])
        mlp = conv(mlp, weight([3, 3, c2, c3], name='w31')) + positive_bias([c3], name='b31')
        mlp = tf.nn.relu(mlp, name='conv1')
        mlp = conv(mlp, weight([3, 3, c3, c3], name='w32')) + positive_bias([c3], name='b32')
        mlp = tf.nn.relu(mlp, name='conv2')
        # mlp = conv(mlp, weight([3, 3, c3, c3], name='w33')) + positive_bias([c3], name='b33')
        # mlp = tf.nn.relu(mlp, name='conv3')
        # mlp = conv(mlp, weight([3, 3, c3, c3], name='w34')) + positive_bias([c3], name='b34')
        # mlp = tf.nn.relu(mlp, name='conv4')
        mlp = tf.add(mlp, res, name='res')
        mlp = pool(mlp, name='pool')

    with tf.name_scope('CONV-4'):
        c4 = 128
        res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c4-c3]])
        mlp = conv(mlp, weight([3, 3, c3, c4], name='w41')) + positive_bias([c4], name='b41')
        mlp = tf.nn.relu(mlp, name='conv1')
        mlp = conv(mlp, weight([3, 3, c4, c4], name='w42')) + positive_bias([c4], name='b42')
        mlp = tf.nn.relu(mlp, name='conv2')
        # mlp = conv(mlp, weight([3, 3, c4, c4], name='w43')) + positive_bias([c4], name='b43')
        # mlp = tf.nn.relu(mlp, name='conv3')
        # mlp = conv(mlp, weight([3, 3, c4, c4], name='w44')) + positive_bias([c4], name='b44')
        # mlp = tf.nn.relu(mlp, name='conv4')
        mlp = tf.add(mlp, res, name='res')
        mlp = pool(mlp, name='pool')

    '''
    with tf.name_scope('MASK'):
        ca = 66
        mask = tf.reshape(mlp, [-1, c4])
        mask = tf.nn.xw_plus_b(mask, weight([c4, ca], 'w5'), zero_bias([ca], 'b5'))
        mask = tf.tanh(mask)
        mask = tf.nn.xw_plus_b(mask, weight([ca, 1], 'w6'), zero_bias([1], 'b6'))
        mask = tf.reshape(mask, [-1, 17*17])
        mask = tf.nn.softmax(mask)
        mask = tf.reshape(mask, [-1, 17, 17, 1])

        mlp = tf.mul(mlp, mask)
        mlp = tf.reduce_sum(mlp, [1, 2], True)
    '''

    if train:
        with tf.name_scope('DROPOUT'):
            mlp = tf.nn.dropout(mlp, 0.5, noise_shape=tf.shape(mlp)*[1, 0, 0, 1]+[0, 1, 1, 0], name='dropout')  # dropout by map

    with tf.name_scope('FLAT'):
        mlp = tf.reshape(mlp, [-1, 17*17*c4], name='flat')

    '''
    if train:
        with tf.name_scope('DROPOUT'):
            mlp = tf.nn.dropout(mlp, 0.5, name='dropout')
    '''

    # 1FC
    with tf.name_scope('FC'):
        logit_exp = tf.nn.xw_plus_b(mlp, weight([17*17*c4, 7], name='w7_exp'), zero_bias([7], name='b7_exp'), name='logit_exp')
        logit_pse = tf.nn.xw_plus_b(mlp, weight([17*17*c4, 5], name='w7_pse'), zero_bias([5], name='b7_pse'), name='logit_pse')
        del mlp

    with tf.name_scope('Y'):
        y_exp = tf.placeholder(tf.float32, [None, 7], name='y_exp')
        y_pse = tf.placeholder(tf.float32, [None, 5], name='y_pse')

    with tf.name_scope('SOFTMAX-WITH-LOSS'):
        loss_exp = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logit_exp, y_exp), name='loss_exp')
        loss_pse = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logit_pse, y_pse), name='loss_pse')
        lambda_ = 0
        loss = loss_exp + lambda_ * loss_pse

    with tf.name_scope('SOFTMAX'):
        prob_exp = tf.nn.softmax(logit_exp, name='prob_exp')
        prob_pse = tf.nn.softmax(logit_pse, name='prob_pse')

    with tf.name_scope('ACC'):
        acc_exp = tf.equal(tf.argmax(prob_exp, 1), tf.argmax(y_exp, 1), name='correct_exp')
        acc_exp = tf.reduce_mean(tf.cast(acc_exp, tf.float32), name='acc_exp')
        acc_pse = tf.equal(tf.argmax(prob_pse, 1), tf.argmax(y_pse, 1), name='correct_pse')
        acc_pse = tf.reduce_mean(tf.cast(acc_pse, tf.float32), name='acc_pse')

    if train:
        with tf.name_scope('OPT'):
            opt = tf.train.AdamOptimizer(name='opt')
            train_op = opt.minimize(loss, name='train_op')
    else:
        train_op = None

    # 创建summary
    '''
    with tf.name_scope('SUM'):
        # mask 为方便展示仅尺度变化
        # mask_m = tf.reduce_min(mask, [1, 2], True)
        mask_M = tf.reduce_max(mask, [1, 2], True)
        mask_visual = mask / mask_M * 255.0  # mask_visual = (mask-mask_m) / (mask_M-mask_m) * 255.0

        # prj_mask 为方便展示仅尺度变化
        prj_mask = mask
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2)
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2)
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2)
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME')
        prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2)
        # prj_mask_m = tf.reduce_min(prj_mask, [1, 2], True)
        prj_mask_M = tf.reduce_max(prj_mask, [1, 2], True)
        prj_mask_visual = prj_mask / prj_mask_M * 255.0   # prj_mask_visual = (prj_mask-prj_mask_m) / (prj_mask_M-prj_mask_m) * 255.0

        # mask_crop_x 为方便展示动态范围变化
        mask_crop_x = prj_mask * crop_x
        mask_crop_x_m = tf.reduce_min(mask_crop_x, [1, 2], True)
        mask_crop_x_M = tf.reduce_max(mask_crop_x, [1, 2], True)
        mask_crop_x_visual = (mask_crop_x - mask_crop_x_m) / (mask_crop_x_M - mask_crop_x_m) * 255.0

        # y_exp
        y_exp_visual = tf.reshape(y_exp, [-1, 1, 7, 1]) * 255.0

        # y_pse
        y_pse_visual = tf.reshape(y_pse, [-1, 1, 7, 1]) * 255.0

        # prob
        prob_visual = tf.reshape(prob, [-1, 1, 7, 1]) * 255.0
        '''

    if train:
        summary = tf.merge_summary([
                # tf.image_summary('train mask', mask_visual),  # 1 17 17 1
                # tf.image_summary('train prj_mask', prj_mask_visual),  # 1 272 272 1
                # tf.image_summary('train crop_x', crop_x),  # 1 272 272 1
                # tf.image_summary('train mask_crop_x', mask_crop_x_visual),  # 1 272 272 1
                # tf.image_summary('train y_exp', y_exp_visual),  # 1 1 7 1
                # tf.image_summary('train y_pse', y_pse_visual),  # 1 1 5 1
                # tf.image_summary('train prob', prob_visual),  # 1 1 7 1
                tf.scalar_summary('train loss', loss),
                tf.scalar_summary('train loss_exp', loss_exp),
                tf.scalar_summary('train loss_pse', loss_pse),
                tf.scalar_summary('train acc_exp', acc_exp),
                tf.scalar_summary('train acc_pse', acc_pse),
        ])
    else:
        summary = tf.merge_summary([
                # tf.image_summary('val mask', mask_visual),  # 1 17 17 1
                # tf.image_summary('val prj_mask', prj_mask_visual),  # 1 272 272 1
                # tf.image_summary('val crop_x', crop_x),  # 1 272 272 1
                # tf.image_summary('val mask_crop_x', mask_crop_x_visual),  # 1 272 272 1
                # tf.image_summary('val y_exp', y_exp_visual),  # 1 1 7 1
                # tf.image_summary('val y_pse', y_pse_visual),  # 1 1 5 1
                # tf.image_summary('val prob', prob_visual),  # 1 1 7 1
                tf.scalar_summary('val loss', loss),
                tf.scalar_summary('val loss_exp', loss_exp),
                tf.scalar_summary('val loss_pse', loss_pse),
                tf.scalar_summary('val acc_exp', acc_exp),
                tf.scalar_summary('val acc_pse', acc_pse),
        ])

    return [x, y_exp, y_pse, loss, acc_exp, acc_pse, train_op, summary, crop_x]
tf.summary.scalar('loss', loss)

# The SGD Optimizer with momentum
learning_rate = tf.placeholder(tf.float32, [])
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_step = tf.train.MomentumOptimizer(learning_rate,
                                            momentum=0.9).minimize(loss)

correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_actual, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100
tf.summary.scalar('acc', accuracy)

img = tf.placeholder(tf.float32, [32, 32, 3])
norm_image = tf.image.per_image_standardization(img)
img_rand_crop = tf.random_crop(img, [28, 28, 3])

sess = tf.InteractiveSession()
tensorboard_data = tf.summary.merge_all()
current_time = str(time.time())
train_writer = tf.summary.FileWriter('../Tensorboard/inception/train/' +\
                                     current_time, sess.graph)
test_writer = tf.summary.FileWriter('../Tensorboard/inception/test/' +\
                                    current_time, sess.graph)
tf.global_variables_initializer().run()

cifar10_train_images = []
cifar10_train_labels = []
print "Loading training images..."
for i in range(1, 6):
    train_file = open('../../cifar-10-batches-py/data_batch_' + str(i), 'r')
Ejemplo n.º 54
0
def cnn_model_fn(features, labels, mode, num_classes=20):
    """Model function for CNN."""
    # Input Layer
    input_layer = tf.reshape(features["x"], [-1, 256, 256, 3])

    # Data Augmentation
    # Train: Random crops and left-right flips
    if mode == tf.estimator.ModeKeys.TRAIN:
        tmp = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), input_layer)
        tmp = tf.map_fn(lambda img: tf.random_crop(img, size=[224,224,3]), tmp)
        augment_input = tf.map_fn(lambda img: tf.image.resize_images(img, size=[256,256]), tmp)
        # add to tensorboard
        tf.summary.image('training_images', augment_input)
    # Test: Center crop
    elif mode == tf.estimator.ModeKeys.PREDICT:
        tmp = tf.map_fn(lambda img: tf.image.central_crop(img, central_fraction=0.8), input_layer)
        augment_input = tf.map_fn(lambda img: tf.image.resize_images(img, size=[256,256]), tmp)


    # add Network Graph to tensorboard
    # convolution layer #1: conv3-64
    with tf.variable_scope('conv1'):
        conv1 = tf.layers.conv2d(
            inputs=augment_input,
            kernel_size=[3, 3],
            strides=1,
            filters=64,
            padding="same",
            activation=tf.nn.relu,
            name = "conv1_1")

        # convolution layer #2: conv3-64
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            kernel_size=[3, 3],
            strides=1,
            filters=64,
            padding="same",
            activation=tf.nn.relu,
            name = "conv1_2")
    with tf.variable_scope('pool1'):
        # pooling layer #1
        pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2, name = "pool1")

    # convolution layer #3: conv3-128
    with tf.variable_scope('conv2'):
        conv3 = tf.layers.conv2d(
            inputs=pool1,
            kernel_size=[3, 3],
            strides=1,
            filters=128,
            padding="same",
            activation=tf.nn.relu,
            name = "conv2_1")
        # convolution layer #4: conv3-128
        conv4 = tf.layers.conv2d(
            inputs=conv3,
            kernel_size=[3, 3],
            strides=1,
            filters=128,
            padding="same",
            activation=tf.nn.relu,
            name = "conv2_2")

    with tf.variable_scope('pool2'):
        # pooling layer #2
        pool2 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2, name = "pool2")

    # convolution layer #5: conv3-256
    with tf.variable_scope('conv3'):
        conv5 = tf.layers.conv2d(
            inputs=pool2,
            kernel_size=[3, 3],
            strides=1,
            filters=256,
            padding="same",
            activation=tf.nn.relu,
            name = "conv3_1")
        # convolution layer #6: conv3-256
        conv6 = tf.layers.conv2d(
            inputs=conv5,
            kernel_size=[3, 3],
            strides=1,
            filters=256,
            padding="same",
            activation=tf.nn.relu,
            name = "conv3_2")
        # convolution layer #7: conv3-256
        conv7 = tf.layers.conv2d(
            inputs=conv6,
            kernel_size=[3, 3],
            strides=1,
            filters=256,
            padding="same",
            activation=tf.nn.relu,
            name = "conv3_3")

    with tf.variable_scope('pool3'):
        # pooling layer #3
        pool3 = tf.layers.max_pooling2d(inputs=conv7, pool_size=[2, 2], strides=2, name = "pool3")

    # convolution layer #8: conv3-512
    with tf.variable_scope('conv4'):
        conv8 = tf.layers.conv2d(
            inputs=pool3,
            kernel_size=[3, 3],
            strides=1,
            filters=512,
            padding="same",
            activation=tf.nn.relu,
            name = "conv4_1")
        # convolution layer #9: conv3-512
        conv9 = tf.layers.conv2d(
            inputs=conv8,
            kernel_size=[3, 3],
            strides=1,
            filters=512,
            padding="same",
            activation=tf.nn.relu,
            name = "conv4_2")
        # convolution layer #10: conv3-512
        conv10 = tf.layers.conv2d(
            inputs=conv9,
            kernel_size=[3, 3],
            strides=1,
            filters=512,
            padding="same",
            activation=tf.nn.relu,
            name = "conv4_3")
    with tf.variable_scope('pool4'):
        # pooling layer #4
        pool4 = tf.layers.max_pooling2d(inputs=conv10, pool_size=[2, 2], strides=2, name = "pool4")

    # convolution layer #11: conv3-512
    with tf.variable_scope('conv5'):
        conv11 = tf.layers.conv2d(
            inputs=pool4,
            kernel_size=[3, 3],
            strides=1,
            filters=512,
            padding="same",
            activation=tf.nn.relu,
            name = "conv5_1")
        # convolution layer #12: conv3-512
        conv12 = tf.layers.conv2d(
            inputs=conv11,
            kernel_size=[3, 3],
            strides=1,
            filters=512,
            padding="same",
            activation=tf.nn.relu,
            name = "conv5_2")
        # convolution layer #13: conv3-512
        conv13 = tf.layers.conv2d(
            inputs=conv12,
            kernel_size=[3, 3],
            strides=1,
            filters=512,
            padding="same",
            activation=tf.nn.relu,
            name = "conv5_3")
    with tf.variable_scope('pool5'):
        # pooling layer #5
        pool5 = tf.layers.max_pooling2d(inputs=conv13, pool_size=[2, 2], strides=2, name = "pool5")

    # flatten
    pool5_flat = tf.reshape(pool5, [-1, 8 * 8 * 512])
    # fc(4096)
    dense1 = tf.layers.dense(inputs=pool5_flat, units=4096,
                            activation=tf.nn.relu, name = "fc6")
    # dropout
    dropout1 = tf.layers.dropout(
        inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)

    # fc(4096)
    dense2 = tf.layers.dense(inputs=dropout1, units=4096,
                            activation=tf.nn.relu, name = "fc7")
    # dropout
    dropout2 = tf.layers.dropout(
        inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout2, units=20, name = "fc8")

    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        #"classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.sigmoid(logits, name="sigmoid_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.identity(tf.losses.sigmoid_cross_entropy(
        multi_class_labels=labels, logits=logits), name='loss')

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        tf.summary.scalar("training_loss", loss)

        decayed_learning_rate = tf.train.exponential_decay(
            0.001,  # Base learing rate
            global_step=tf.train.get_global_step(),
            decay_steps=100,  # Decay step
            decay_rate=0.5,    # Decay rate
            staircase=True)
        # add lr to tensorboard
        tf.summary.scalar('learning_rate', decayed_learning_rate)

        # SGD + Momentum optimizer
        optimizer = tf.train.MomentumOptimizer(learning_rate=decayed_learning_rate,
                                               momentum = 0.9)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())

        # add histogram of gradients to tensorboard
        train_summary =[]
        grads_and_vars = optimizer.compute_gradients(loss)
        # tf.summary.histogram("grad_histogram",grads_and_vars)
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad_histogram".format(v.name[:-2]), g)
                #sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                train_summary.append(grad_hist_summary)
                #train_summary.append(sparsity_summary)
        tf.summary.merge(train_summary)

        return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss, train_op=train_op)

    tf.summary.scalar('test_loss', loss)
    eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(
            labels=labels, predictions=predictions["probabilities"])}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
Ejemplo n.º 55
0
def distorted_inputs(data_dir, batch_size, noise_ratio=0):
    """Construct distorted input for CIFAR training using the Reader ops.

  Args:
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
    filenames = [
        os.path.join(
            data_dir,
            'data_batch_%d_noise_%.2f_with_index.bin' % (v, noise_ratio))
        for v in xrange(1, 6)
    ]
    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    with tf.name_scope('data_augmentation'):
        # Read examples from files in the filename queue.
        read_input = read_cifar10(filename_queue)
        reshaped_image = tf.cast(read_input.uint8image, tf.float32)

        height = IMAGE_SIZE
        width = IMAGE_SIZE

        # Image processing for training the network. Note the many random
        # distortions applied to the image.

        # padding
        distorted_image = tf.image.resize_image_with_crop_or_pad(
            reshaped_image, height + 4, width + 4)

        # Randomly crop a [height, width] section of the image.
        distorted_image = tf.random_crop(distorted_image, [height, width, 3])

        # Randomly flip the image horizontally.
        distorted_image = tf.image.random_flip_left_right(distorted_image)

        # Subtract off the mean and divide by the variance of the pixels.
        float_image = tf.image.per_image_standardization(distorted_image)
        #float_image = (distorted_image/255 - tf.reshape(tf.constant([0.507, 0.487, 0.441]),[1,1,3]))/tf.reshape(tf.constant([0.267, 0.256, 0.276]),[1,1,3])

        # Set the shapes of tensors.
        float_image.set_shape([height, width, 3])
        read_input.index.set_shape([1])
        read_input.label.set_shape([1])

        # Ensure that the random shuffling has good mixing properties.
        min_fraction_of_examples_in_queue = 0.4
        min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                                 min_fraction_of_examples_in_queue)
        print('Filling queue with %d CIFAR images before starting to train. '
              'This will take a few minutes.' % min_queue_examples)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(read_input.index,
                                           float_image,
                                           read_input.label,
                                           min_queue_examples,
                                           batch_size,
                                           shuffle=True)
Ejemplo n.º 56
0
def cnn_model_fn(features, labels, mode, num_classes=20):

    if mode == tf.estimator.ModeKeys.PREDICT:
        features["x"] = tf.image.resize_image_with_crop_or_pad(
            features["x"], 224, 224)
    else:
        augmentedData = tf.map_fn(
            lambda img: tf.image.random_flip_left_right(img), features["x"])
        augmentedData = tf.map_fn(
            lambda img: tf.random_crop(img, [224, 224, 3]), augmentedData)
        features["x"] = augmentedData

# features_flipped = tf.image.random_flip_left_right(features["x"])
# features["x"].append(features_flipped)

    input_layer = tf.reshape(features["x"], [-1, 224, 224, 3])

    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(inputs=input_layer,
                             filters=96,
                             strides=4,
                             kernel_size=[11, 11],
                             kernel_initializer=tf.initializers.random_normal(
                                 0, 0.01),
                             bias_initializer=tf.initializers.zeros(),
                             padding="valid",
                             activation=tf.nn.relu)

    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3], strides=2)

    # Convolutional Layer #2 and Pooling Layer #2
    conv2 = tf.layers.conv2d(inputs=pool1,
                             filters=256,
                             kernel_size=[5, 5],
                             strides=1,
                             kernel_initializer=tf.initializers.random_normal(
                                 0, 0.01),
                             bias_initializer=tf.initializers.zeros(),
                             padding="same",
                             activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[3, 3], strides=2)

    # Convolutional Layer #3,#4,#5 and Pooling Layer #3
    conv3 = tf.layers.conv2d(inputs=pool2,
                             filters=384,
                             kernel_size=[3, 3],
                             strides=1,
                             kernel_initializer=tf.initializers.random_normal(
                                 0, 0.01),
                             bias_initializer=tf.initializers.zeros(),
                             padding="same",
                             activation=tf.nn.relu)

    conv4 = tf.layers.conv2d(inputs=conv3,
                             filters=384,
                             kernel_size=[3, 3],
                             strides=1,
                             kernel_initializer=tf.initializers.random_normal(
                                 0, 0.01),
                             bias_initializer=tf.initializers.zeros(),
                             padding="same",
                             activation=None)

    conv5 = tf.layers.conv2d(inputs=conv4,
                             filters=256,
                             kernel_size=[3, 3],
                             strides=1,
                             kernel_initializer=tf.initializers.random_normal(
                                 0, 0.01),
                             bias_initializer=tf.initializers.zeros(),
                             padding="same",
                             activation=None)

    pool3 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2)

    pool3_flat = tf.reshape(pool3, [-1, 5 * 5 * 256])

    dense1 = tf.layers.dense(inputs=pool3_flat,
                             units=4096,
                             activation=tf.nn.relu)
    dropout1 = tf.layers.dropout(inputs=dense1,
                                 rate=0.5,
                                 training=mode == tf.estimator.ModeKeys.TRAIN)
    dense2 = tf.layers.dense(inputs=dropout1,
                             units=4096,
                             activation=tf.nn.relu)
    dropout2 = tf.layers.dropout(inputs=dense2,
                                 rate=0.5,
                                 training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout2, units=num_classes)

    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.sigmoid(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)

    loss = tf.identity(tf.losses.sigmoid_cross_entropy(
        multi_class_labels=labels, logits=logits),
                       name='loss')

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        '''
        summary_hook = tf.train.SummarySaverHook(
            400,
            output_dir="/tmp/pascal_model_alexnet",
            summary_op=tf.summary.merge_all())
        '''

        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 0.001
        learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                   tf.train.get_global_step(),
                                                   10000,
                                                   0.5,
                                                   staircase=True)

        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=0.9)

        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
        "accuracy":
        tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    }
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      eval_metric_ops=eval_metric_ops)
Ejemplo n.º 57
0
def distorted_inputs(data_dir, batch_size):
    """Construct distorted input for CIFAR training using the Reader ops.

  Args:
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
    filenames = [
        os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in xrange(1, 6)
    ]
    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    with tf.name_scope('data_augmentation'):
        # Read examples from files in the filename queue.
        read_input = read_cifar10(filename_queue)
        reshaped_image = tf.cast(read_input.uint8image, tf.float32)

        height = IMAGE_SIZE
        width = IMAGE_SIZE

        # Image processing for training the network. Note the many random
        # distortions applied to the image.

        # Randomly crop a [height, width] section of the image.
        distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

        # Randomly flip the image horizontally.
        distorted_image = tf.image.random_flip_left_right(distorted_image)

        # Because these operations are not commutative, consider randomizing
        # the order their operation.
        # NOTE: since per_image_standardization zeros the mean and makes
        # the stddev unit, this likely has no effect see tensorflow#1458.
        distorted_image = tf.image.random_brightness(distorted_image,
                                                     max_delta=63)
        distorted_image = tf.image.random_contrast(distorted_image,
                                                   lower=0.2,
                                                   upper=1.8)

        # Subtract off the mean and divide by the variance of the pixels.
        float_image = tf.image.per_image_standardization(distorted_image)

        # Set the shapes of tensors.
        float_image.set_shape([height, width, 3])
        read_input.label.set_shape([1])

        # Ensure that the random shuffling has good mixing properties.
        min_fraction_of_examples_in_queue = 0.4
        min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                                 min_fraction_of_examples_in_queue)
        print('Filling queue with %d CIFAR images before starting to train. '
              'This will take a few minutes.' % min_queue_examples)

    # Generate a batch of images and labels by building up a queue of examples.
    import IPython
    IPython.embed()
    return _generate_image_and_label_batch(float_image,
                                           read_input.label,
                                           min_queue_examples,
                                           batch_size,
                                           shuffle=True)
Ejemplo n.º 58
0
def distorted_inputs(data_dir, batch_size):
    """Construct distorted input for training using the Reader ops.

  Args:
    data_dir: Path to the dogcat data directory.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """

    onlyfiles = [
        f for f in listdir(data_dir)
        if (isfile(join(data_dir, f)) and f.endswith('.jpg'))
    ]
    filepaths = [join(data_dir, f) for f in onlyfiles]
    labels = [label_by_name(f) for f in onlyfiles]
    # Create a queue that produces the filenames to read.
    # filenames_queue = tf.train.string_input_producer(filepaths)
    # labels_queue = tf.train.string_input_producer(labels)
    images_tensor = tf.convert_to_tensor(filepaths, dtype=tf.string)
    filename_tensor = tf.convert_to_tensor(onlyfiles, dtype=tf.string)
    labels_tensor = tf.convert_to_tensor(labels, dtype=tf.int32)

    input_queue = tf.train.slice_input_producer([filepaths, labels],
                                                shuffle=False)

    with tf.name_scope('data_augmentation'):
        # Read examples from files in the filename queue.
        read_input = read_data(input_queue)
        reshaped_image = tf.cast(read_input.uint8image, tf.float32)

        height = IMAGE_SIZE
        width = IMAGE_SIZE

        # Image processing for training the network. Note the many random
        # distortions applied to the image.

        # Randomly crop a [height, width] section of the image.
        distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

        # Randomly flip the image horizontally.
        distorted_image = tf.image.random_flip_left_right(distorted_image)

        # Because these operations are not commutative, consider randomizing
        # the order their operation.
        # NOTE: since per_image_standardization zeros the mean and makes
        # the stddev unit, this likely has no effect see tensorflow#1458.
        distorted_image = tf.image.random_brightness(distorted_image,
                                                     max_delta=63)
        distorted_image = tf.image.random_contrast(distorted_image,
                                                   lower=0.2,
                                                   upper=1.8)

        # Subtract off the mean and divide by the variance of the pixels.
        float_image = tf.image.per_image_standardization(distorted_image)

        # Set the shapes of tensors.
        float_image.set_shape([height, width, 3])
        read_input.label.set_shape([1])

        # Ensure that the random shuffling has good mixing properties.
        min_fraction_of_examples_in_queue = 0.4
        min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                                 min_fraction_of_examples_in_queue)
        print(
            'Filling queue with %d cat and dog images before starting to train. '
            'This will take a few minutes...' % min_queue_examples)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image,
                                           read_input.label,
                                           min_queue_examples,
                                           batch_size,
                                           shuffle=True)
Ejemplo n.º 59
0
 def _random_crop_image(img):
     return tf.random_crop(img, [rows, cols, 3])
Ejemplo n.º 60
0
    def build_model(self):

        self.subject_num = SUBJECT_NUM_VGG2

        self.input_labels = tf.placeholder(tf.int64, [
            self.batch_size,
        ],
                                           name='positive_labels')
        self.input_filenames = [
            tf.placeholder(dtype=tf.string) for _ in range(self.batch_size)
        ]

        self.sample_images = tf.placeholder(
            tf.float32, [self.sample_size] +
            [self.output_size, self.output_size, self.c_dim],
            name='sample_images')
        self.sample_input_images = tf.placeholder(
            tf.float32, [1, self.output_size, self.output_size, self.c_dim],
            name='sample_input_images')

        # Networks
        self.images = []
        for i in range(self.batch_size):
            file_contents = tf.read_file(self.input_filenames[i])
            image = tf.image.decode_jpeg(file_contents, channels=3)
            image = tf.image.resize_images(
                image, [self.before_crop_size, self.before_crop_size])
            #if self.random_rotate:
            #    image = tf.py_func(random_rotate_image, [image], tf.uint8)
            if (self.padding > 0):
                image = tf.random_crop(image,
                                       [self.output_size, self.output_size, 3])

            #if args.random_crop:
            #    image = tf.random_crop(image, [args.image_size, args.image_size, 3])
            #else:
            #    image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
            if self.random_flip:
                image = tf.image.random_flip_left_right(image)
            self.images.append(
                tf.subtract(tf.div(tf.cast(image, dtype=tf.float32), 127.5),
                            1.0))
        self.images = tf.stack(self.images)

        opt = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta1)

        self.images_splits = tf.split(self.images, self.num_gpus)
        self.input_labels_splits = tf.split(self.input_labels, self.num_gpus)
        self.d_loss_real_id = []
        self.d_acc = []
        self.d_loss_real_center = []
        tower_grads = []
        for gpu_id in range(self.num_gpus):
            with tf.device(
                    tf.DeviceSpec(device_type="GPU", device_index=gpu_id)):

                if gpu_id == 0:
                    reuse = False
                else:
                    reuse = True

                self.D_R_id_logits, self.D_R_fx, self.D_R_ln_w = self.discriminator(
                    self.images_splits[gpu_id], is_reuse=reuse
                )  # _, self.D_R_logits, _, self.D_R_id_logits, _, self.D_R_pose_logits,_
                self.m_l2 = tf.pow(
                    tf.norm(tf.matmul(self.D_R_fx, self.D_R_ln_w),
                            ord='euclidean',
                            axis=[-2, -1]), 2) * 0.0000001

                self.d_acc_i = slim.metrics.accuracy(
                    tf.argmax(self.D_R_id_logits, 1),
                    self.input_labels_splits[gpu_id],
                    weights=100.0)

                #self.D_R_id_logits = AMSoftmax_logit_v2(self.D_R_id_logits, self.D_R_ln_w,
                #                                        label_batch=self.input_labels_splits[gpu_id],
                #                                        nrof_classes=self.subject_num)
                # tf.global_variables_initializer().run()

                # D Loss
                self.d_loss_real_id_i = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.D_R_id_logits,
                        labels=self.input_labels_splits[gpu_id]))

                # self.d_loss_real_center_i, self.real_centers = center_loss(self.D_R_fx, self.input_labels_splits[gpu_id], 0.5, self.subject_num)
                # self.d_loss_real_center_i *= 0.03
                self.d_loss_regularizer = tf.zeros(shape=[])
                self.d_loss_real_center = tf.zeros(shape=[])

                grads = opt.compute_gradients(self.d_loss_real_id_i)
                tower_grads.append(grads)

                self.d_loss_real_id.append(self.d_loss_real_id_i)
                self.d_acc.append(self.d_acc_i)
                # self.d_loss_real_center.append(self.d_loss_real_center_i)
        # self.d_loss_real_center = tf.reduce_mean(self.d_loss_real_center)
        grads = average_gradients(tower_grads)
        self.train_op = opt.apply_gradients(grads)

        self.d_loss_real_id = tf.reduce_mean(self.d_loss_real_id)
        self.d_acc = tf.reduce_mean(self.d_acc)
        self.d_loss = self.d_loss_real_id

        # Sumaries
        tf.summary.scalar("Total loss", self.d_loss)
        tf.summary.scalar("ID - softmax loss", self.d_loss_real_id)
        tf.summary.scalar("Center loss", self.d_loss_real_center)
        tf.summary.scalar("Regularizer loss", self.d_loss_regularizer)
        tf.summary.scalar("Train accuracy", self.d_acc)
        tf.summary.scalar("M - L2 loss", self.m_l2)

        self.summary_op = tf.summary.merge_all()
        #self.summary_writer = tf.summary.FileWriter(self.checkpoint_dir+"/"+self.model_dir+"/log", self.sess.graph)

        self.d_loss = tf.reduce_mean(self.d_loss)
        self.d_acc = tf.reduce_mean(self.d_acc)

        # Vars
        self.t_vars = tf.trainable_variables()
        self.d_vars = [
            var for var in self.t_vars
            if not ('d_' not in var.name or 'd_k6_id_31239_pai3pi' in var.name
                    or 'd_k6_id_FactoryOne' in var.name
                    or 'd_k6_id_FactoryTwo' in var.name)
        ]

        for var in self.d_vars:
            print var.op.name
        self.d_saver = tf.train.Saver(self.d_vars,
                                      keep_checkpoint_every_n_hours=.5,
                                      max_to_keep=20)
        self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=.5,
                                    max_to_keep=10)
        '''