def image_augmentation(self, train_data, test_data): train_data = tf.map_fn(lambda img: tf.image.flip_left_right(img), train_data) train_data = tf.map_fn(lambda img: tf.image.random_brightness(img,max_delta=63), train_data) train_data = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.2, upper=1.8),train_data) if self.params['use_grayscale']: train_data = tf.map_fn(lambda img: tf.image.rgb_to_grayscale(img), train_data) if self.params['use_gradient_images']: train_data = self.apply_sobel(train_data) # self.input_real = tf.map_fn(lambda img: tf.image.per_image_standardization(img), self.input_real) train_data = tf.map_fn(lambda img: tf.image.per_image_standardization(img),train_data) test_data = tf.map_fn(lambda img: tf.image.per_image_standardization(img),test_data) test_data = test_data if self.params['use_grayscale']: test_data = tf.map_fn(lambda img: tf.image.rgb_to_grayscale(img), test_data) if self.params['use_gradient_images']: test_data = self.apply_sobel(test_data) # self.input_test = tf.map_fn(lambda img: tf.image.per_image_standardization(img), self.input_test) train_data = tf.map_fn(lambda img: tf.image.resize_image_with_crop_or_pad(img,30,30),train_data) train_data = tf.map_fn(lambda img: tf.image.resize_image_with_crop_or_pad(img,42,42),train_data) if self.params['use_grayscale']: train_data = tf.map_fn(lambda img: tf.random_crop(img,[32,32,1]),train_data) else: train_data = tf.map_fn(lambda img: tf.random_crop(img,[32,32,3]),train_data) return train_data, test_data
def pre_process_img(image): image = tf.image.random_flip_left_right(image) image = tf.image.random_brightness(image, max_delta=32./255) image = tf.image.random_contrast(image, lower=0.8, upper=1.2) image = tf.random_crop(image, [default_height-np.random.randint(0, 4), default_width-np.random.randint(0, 4), 1]) image = tf.image.resize_images(image, [default_height, default_width]) return image
def preprocess_example(self, example, mode, hparams): # Crop to target shape instead of down-sampling target, leaving target # of maximum available resolution. target_shape = (self.output_dim, self.output_dim, self.num_channels) example["targets"] = tf.random_crop(example["targets"], target_shape) example["inputs"] = image_utils.resize_by_area(example["targets"], self.input_dim) if self.inpaint_fraction is not None and self.inpaint_fraction > 0: mask = random_square_mask((self.input_dim, self.input_dim, self.num_channels), self.inpaint_fraction) example["inputs"] = tf.multiply( tf.convert_to_tensor(mask, dtype=tf.int64), example["inputs"]) if self.input_dim is None: raise ValueError("Cannot train in-painting for examples with " "only targets (i.e. input_dim is None, " "implying there are only targets to be " "generated).") return example
def preprocess_for_train(image, output_height, output_width, padding=_PADDING): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. padding: The amound of padding before and after each dimension of the image. Returns: A preprocessed image. """ padded_image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]]) # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(padded_image, [output_height, output_width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing # the order their operation. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. return tf.image.per_image_whitening(distorted_image)
def read_input(image_queue): # Read the images and generate the decode from PNG image imageReader = tf.WholeFileReader() image_key, image_value = imageReader.read(image_queue) image_decode = tf.image.decode_png(image_value, channels=1) image_decode = tf.cast(image_decode, tf.float32) # Preprocess data image_key = rename_image_filename(image_key) # rename image filename label = search_label(image_key) # CREATE OBJECT class Record(object): pass record = Record() # Instantiate object record.key = image_key record.label = tf.cast(label, tf.int32) record.image = image_decode # PROCESSING IMAGES # reshaped_image = tf.cast(record.image, tf.float32) # height = 245 # width = 320 height = 96 width = 96 # Image processing for training the network. Note the many random distortions applied to the image. # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(record.image, [height, width, 1]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing randomize the order their operation. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_whitening(distorted_image) return generate_train_batch(record.label, float_image)
def random_distort_image(image): distorted_image = image distorted_image = tf.image.pad_to_bounding_box( image, 4, 4, 40, 40) # pad 4 pixels to each side distorted_image = tf.random_crop(distorted_image, [32, 32, 3]) distorted_image = tf.image.random_flip_left_right(distorted_image) return distorted_image
def distort_inputs(reshaped_image): distorted_image = tf.random_crop(reshaped_image, imshape) distorted_image = tf.image.random_flip_left_right(distorted_image) distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) float_image = tf.image.per_image_whitening(distorted_image) return float_image
def random_shift(v): if random_shift_y: v = tf.concat([v[-random_shift_y:], v, v[:random_shift_y]], 0) if random_shift_x: v = tf.concat([v[:, -random_shift_x:], v, v[:, :random_shift_x]], 1) return tf.random_crop(v, [resize[0], resize[1], size[2]])
def __imagenet_data_process_function(self, x, y): with tf.name_scope("imagenet_data_aug") as scope: #random scale #apparently, this works better than what we have: #https://github.com/facebook/fb.resnet.torch #but let's use the 'original' formulation for now #randomly sample a size in specified range random_size = tf.squeeze(tf.random_uniform((1, 1), 256, 480, dtype=tf.int32, name="random_scale_size")) #rescale smaller size with this factor tf.cond(tf.greater(tf.shape(x)[0], tf.shape(x)[1]), lambda: tf.image.resize_images(x, [tf.shape(x)[0] * (tf.shape(x)[1] / random_size), random_size]), lambda: tf.image.resize_images(x, [random_size, tf.shape(x)[1] * (tf.shape(x)[0] / random_size)])) x = tf.image.resize_images(x, [224, 224]) #random flip x = tf.image.flip_left_right(x) #random crop x = tf.random_crop(x, [224, 224, 3]) #colour augmentation #this is a little more involved than I first thought #lets pick the inception colour distortion #https://github.com/tensorflow/models/blob/master/inception/inception/image_processing.py x = tf.image.random_brightness(x, max_delta=32. / 255.) x = tf.image.random_saturation(x, lower=0.5, upper=1.5) x = tf.image.random_hue(x, max_delta=0.2) x = tf.image.random_contrast(x, lower=0.5, upper=1.5) x = tf.clip_by_value(x, 0.0, 1.0) #normalisation x = tf.image.per_image_standardization(x) return [x, y]
def aug_train(image, aux): aug_image = tf.pad(image, [[4, 4], [4, 4], [0, 0]]) aug_image = tf.random_crop(aug_image, [32, 32, 3]) aug_image = tf.image.random_flip_left_right(aug_image) aug_image = tf.image.random_contrast(aug_image, 0.75, 1.25) aug_image = (aug_image - aux['mean']) / aux['std'] return aug_image
def testNoOp(self): # No random cropping is performed since the size is value.shape. for shape in (2, 1, 1), (2, 1, 3), (4, 5, 3): value = np.arange(0, np.prod(shape), dtype=np.int32).reshape(shape) with self.test_session(): crop = tf.random_crop(value, shape).eval() self.assertAllEqual(crop, value)
def distorted_inputs(data_dir, batch_size): filenames = [os.path.join(data_dir, "data_batch_%d.bin" % i) for i in xrange(1, 6)] print(filenames) for f in filenames: if not tf.gfile.Exists(f): raise ValueError("Failed to find file: " + f) filename_queue = tf.train.string_input_producer(filenames) read_input = read_cifar10(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) distorted_image = tf.image.random_flip_left_right(distorted_image) distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) float_image = tf.image.per_image_whitening(distorted_image) min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print( "Filling queue with %d CIFAR images before starting to train. " "This will take a few minutes." % min_queue_examples ) return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size)
def image_batch(image_paths, batch_size, load_size=286, crop_size=256, channels=3, shuffle=True, num_threads=4, min_after_dequeue=100, allow_smaller_final_batch=False): """ for jpg and png files """ # queue and reader img_queue = tf.train.string_input_producer(image_paths, shuffle=shuffle) reader = tf.WholeFileReader() # preprocessing _, img = reader.read(img_queue) img = tf.image.decode_image(img, channels=3) ''' tf.image.random_flip_left_right should be used before tf.image.resize_images, because tf.image.decode_image reutrns a tensor without shape which makes tf.image.resize_images collapse. Maybe it's a bug! ''' img = tf.image.random_flip_left_right(img) img = tf.image.resize_images(img, [load_size, load_size]) img = tf.random_crop(img, [crop_size, crop_size, channels]) img = tf.cast(img, tf.float32) / 127.5 - 1 # batch if shuffle: capacity = min_after_dequeue + (num_threads + 1) * batch_size img_batch = tf.train.shuffle_batch([img], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue, num_threads=num_threads, allow_smaller_final_batch=allow_smaller_final_batch) else: img_batch = tf.train.batch([img], batch_size=batch_size, allow_smaller_final_batch=allow_smaller_final_batch) return img_batch, len(image_paths)
def _parser(serialized_example): """Parses a single tf.Example into image and label tensors.""" features = tf.parse_single_example( serialized_example, features={ "image": tf.FixedLenFeature([], tf.string), "label": tf.FixedLenFeature([], tf.int64), }) image = tf.decode_raw(features["image"], tf.uint8) # Initially reshaping to [H, W, C] does not work image = tf.reshape(image, [NUM_CHANNEL, IMAGE_HEIGHT, IMAGE_WIDTH]) # This is needed for `tf.image.resize_image_with_crop_or_pad` image = tf.transpose(image, [1, 2, 0]) image = tf.cast(image, dtype) label = tf.cast(features["label"], tf.int32) if data_aug: image = tf.image.resize_image_with_crop_or_pad(image, IMAGE_HEIGHT + 4, IMAGE_WIDTH + 4) image = tf.random_crop(image, [IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNEL]) image = tf.image.random_flip_left_right(image) if data_format == "channels_first": image = tf.transpose(image, [2, 0, 1]) if div255: image /= 255. return image, label
def create_inputs_norb(is_train: bool, epochs: int): import re if is_train: CHUNK_RE = re.compile(r"train\d+\.tfrecords") else: CHUNK_RE = re.compile(r"test\d+\.tfrecords") processed_dir = './data' chunk_files = [os.path.join(processed_dir, fname) for fname in os.listdir(processed_dir) if CHUNK_RE.match(fname)] image, label = norb.read_norb_tfrecord(chunk_files, epochs) if is_train: # TODO: is it the right order: add noise, resize, then corp? image = tf.image.random_brightness(image, max_delta=32. / 255.) image = tf.image.random_contrast(image, lower=0.5, upper=1.5) image = tf.image.resize_images(image, [48, 48]) image = tf.random_crop(image, [32, 32, 1]) else: image = tf.image.resize_images(image, [48, 48]) image = tf.slice(image, [8, 8, 0], [32, 32, 1]) x, y = tf.train.shuffle_batch([image, label], num_threads=cfg.num_threads, batch_size=cfg.batch_size, capacity=cfg.batch_size * 64, min_after_dequeue=cfg.batch_size * 32, allow_smaller_final_batch=False) return x, y
def distorted_inputs(data_dir, batch_size): """Construct distorted input for CIFAR training using the Reader ops. Args: data_dir: file name list. batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 1] size. labels: Labels. 1D tensor of [batch_size] size. """ filenames = get_train_filenames(data_dir) print(filenames) for f in filenames: if not gfile.Exists(f): raise ValueError('Failed to find file: ' + f) # Create a queue that produces the filenames to read. filename_queue = tf.train.string_input_producer(filenames) # Read examples from files in the filename queue. read_input = read_aurora(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE # angle = int(random.random()*360) # M = cv2.getRotationMatrix2D((IMAGE_SIZE/2, IMAGE_SIZE/2), angle, 1) # dst = cv2.warpAffine(reshaped_image, M, (IMAGE_SIZE, IMAGE_SIZE)) # # Convert rotated image back to tensor # rotated_tensor = tf.convert_to_tensor(np.array(dst)) # Image processing for training the network. Note the many random # distortions applied to the image. # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(reshaped_image, [height, width, 1]) # distorted_image = tf.image.resize_area() # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing # randomize the order their operation. # distorted_image = tf.image.random_brightness(distorted_image, # max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_whitening(distorted_image) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print ('Filling queue with %d aurora images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size)
def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): images = ops.convert_to_tensor(image_list, dtype=tf.string) labels = ops.convert_to_tensor(label_list, dtype=tf.int32) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels], num_epochs=max_nrof_epochs, shuffle=shuffle) images_and_labels = [] for _ in range(nrof_preprocess_threads): image, label = read_images_from_disk(input_queue) if random_rotate: image = tf.py_func(random_rotate_image, [image], tf.uint8) if random_crop: image = tf.random_crop(image, [image_size, image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) if random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((image_size, image_size, 3)) image = tf.image.per_image_standardization(image) images_and_labels.append([image, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size, capacity=4 * nrof_preprocess_threads * batch_size, allow_smaller_final_batch=True) return image_batch, label_batch
def read_and_preprocess(example_data): parsed = tf.parse_single_example(example_data, { 'image/encoded': tf.FixedLenFeature((), tf.string, ''), 'image/class/label': tf.FixedLenFeature([], tf.int64, 1), }) image_bytes = tf.reshape(parsed['image/encoded'], shape=[]) label = tf.cast( tf.reshape(parsed['image/class/label'], shape=[]), dtype=tf.int32) - 1 # end up with pixel values that are in the -1, 1 range image = tf.image.decode_jpeg(image_bytes, channels=NUM_CHANNELS) image = tf.image.convert_image_dtype(image, dtype=tf.float32) # 0-1 image = tf.expand_dims(image, 0) # resize_bilinear needs batches image = tf.image.resize_bilinear( image, [HEIGHT + 10, WIDTH + 10], align_corners=False) image = tf.squeeze(image) # remove batch dimension image = tf.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS]) image = tf.image.random_flip_left_right(image) image = tf.image.random_brightness(image, max_delta=63.0 / 255.0) image = tf.image.random_contrast(image, lower=0.2, upper=1.8) #pixel values are in range [0,1], convert to [-1,1] image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) #return {'image':image}, label return image, label
def distorted_inputs(data_dir, batch_size): """Construct distorted input for CIFAR training using the Reader ops. Args: data_dir: Path to the CIFAR-10 data directory. batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. """ # filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) # for i in xrange(1, 6)] filenames = ['/export/ddorroh/datasets/container/batches-bin/train_batch.bin'] for f in filenames: if not tf.gfile.Exists(f): raise ValueError('Failed to find file: ' + f) # Create a queue that produces the filenames to read. filename_queue = tf.train.string_input_producer(filenames) # Read examples from files in the filename queue. read_input = read_cifar10(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE # Image processing for training the network. Note the many random # distortions applied to the image. # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing # randomize the order their operation. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(distorted_image) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print ('Filling queue with %d CIFAR images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. images, labels = _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size) tf.summary.image('distorted_images', images) return images, labels
def add_image_distortion(self): with tf.variable_scope('distort_image'): image = tf.image.decode_jpeg(self.jpeg, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) crop_scale = tf.random_uniform([], minval=0.5, maxval=1) height = tf.cast(INPUT_SIZE[0] / crop_scale, tf.int32) width = tf.cast(INPUT_SIZE[1] / crop_scale, tf.int32) image = tf.image.resize_images(image, height, width) image = tf.random_crop(image, [INPUT_SIZE[0], INPUT_SIZE[1], 3]) image = tf.image.random_flip_left_right(image) def distort_colors_1(): i = tf.image.random_brightness(image, max_delta=32. / 255.) i = tf.image.random_saturation(i, lower=0.5, upper=1.5) i = tf.image.random_hue(i, max_delta=0.2) i = tf.image.random_contrast(i, lower=0.5, upper=1.5) return i def distort_colors_2(): i = tf.image.random_brightness(image, max_delta=32. / 255.) i = tf.image.random_contrast(i, lower=0.5, upper=1.5) i = tf.image.random_saturation(i, lower=0.5, upper=1.5) i = tf.image.random_hue(i, max_delta=0.2) return i image = tf.cond(tf.equal(0, tf.random_uniform(shape=[], maxval=2, dtype=tf.int32)), distort_colors_1, distort_colors_2) image = tf.sub(image, 0.5) image = tf.mul(image, 2.0) self.distorted_image = image
def inputs(tf_dir, is_train, batch_size, num_epochs=None): image, caption_tids, cocoid = records(tf_dir, num_epochs) reshaped_image = tf.image.resize_images(image, IM_S, IM_S) if is_train: distorted_image = tf.random_crop(reshaped_image, [CNN_S, CNN_S, 3]) distorted_image = tf.image.random_brightness(distorted_image, max_delta=32./255.) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) distorted_image = tf.clip_by_value(distorted_image, 0.0, 1.0) else: distorted_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, CNN_S, CNN_S) image = distorted_image # [0,1) --> [-1,1) image = tf.sub(image, 0.5) image = tf.mul(image, 2.0) num_preprocess_threads = 4 min_queue_examples = 20 outputs = [image, caption_tids, cocoid] return tf.train.shuffle_batch( outputs, batch_size=batch_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * batch_size, min_after_dequeue=min_queue_examples)
def preprocess_for_train(image, output_height, output_width, padding): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. padding: The amound of padding before and after each dimension of the image. Returns: A preprocessed image. """ # Transform the image to floats. image = tf.to_float(image) if padding > 0: image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]]) angles = 0.1 * np.pi * np.random.randint(8,size=1) - 0.4 * np.pi image = tf.contrib.image.rotate(image, angles) # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(image, [output_height, output_width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) #distorted_image = tf.image.random_contrast(distorted_image,lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. return tf.image.per_image_standardization(distorted_image)
def input_data_t(data_dir, trainfile, batch_size, shuffle=True): image_list, label_list = read_labeled_image_list(data_dir, trainfile) images = ops.convert_to_tensor(image_list, dtype=dtypes.string) labels = ops.convert_to_tensor(label_list, dtype=dtypes.int32) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels], num_epochs=64, shuffle=True) image, label = read_images_from_disk(input_queue) distored_image = tf.random_crop(image, [HEIGHT, WIDTH, 3]) distorted_image = tf.image.random_flip_left_right(distored_image) distorted_image = tf.image.random_brightness(distorted_image,max_delta=63) distorted_image = tf.image.random_contrast(distorted_image,lower=0.2, upper=1.8) float_image = tf.image.per_image_whitening(distorted_image) min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print ('Filling queue with %d CIFAR images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) return _generate_image_and_label_batch(float_image, label, min_queue_examples, batch_size, shuffle)
def pr_image(image): reshaped_image = random_resize(image, H['arch']['min_scale'], H['arch']['max_scale']) # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(reshaped_image, [H['arch']['input_size'], H['arch']['input_size'], H['arch']['num_channels']]) distorted_image.set_shape([H['arch']['input_size'], H['arch']['input_size'], H['arch']['num_channels']]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, # consider randomizing, randomize the order their operation. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) distorted_image = tf.image.random_hue(distorted_image, max_delta=0.2) distorted_image = tf.image.random_saturation(distorted_image, lower=0.5, upper=1.5) return tf.image.per_image_whitening(distorted_image)
def map_train(image, label): image = tf.image.resize_image_with_crop_or_pad(image, image_size + 4, image_size + 4) image = tf.random_crop(image, [image_size, image_size, 3]) image = tf.image.random_flip_left_right(image) image = tf.image.per_image_standardization(image) return (image, label)
def distorted_inputs(batch_size): path = "train" read_input = read_cifar10(path) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE_Y width = IMAGE_SIZE_X distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing # the order their operation. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_whitening(distorted_image) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print ('Filling queue with %d CIFAR images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size, shuffle=True)
def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255): """ Randomly crop and pads the input images. Args: image: Training image to crop/ pad. label: Segmentation mask to crop/ pad. crop_h: Height of cropped segment. crop_w: Width of cropped segment. ignore_label: Label to ignore during the training. """ label = tf.cast(label, dtype=tf.float32) label = label - ignore_label # Needs to be subtracted and later added due to 0 padding. combined = tf.concat(axis=2, values=[image, label]) image_shape = tf.shape(image) combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1])) last_image_dim = tf.shape(image)[-1] # last_label_dim = tf.shape(label)[-1] combined_crop = tf.random_crop(combined_pad, [crop_h, crop_w, 4]) img_crop = combined_crop[:, :, :last_image_dim] label_crop = combined_crop[:, :, last_image_dim:] label_crop = label_crop + ignore_label label_crop = tf.cast(label_crop, dtype=tf.uint8) # Set static shape so that tensorflow knows shape at compile time. img_crop.set_shape((crop_h, crop_w, 3)) label_crop.set_shape((crop_h,crop_w, 1)) return img_crop, label_crop
def distorted_inputs(data_dir, batch_size): """Construct distorted input for CIFAR training using the Reader ops. Args: data_dir: Path to the CIFAR-10 data directory. batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. """ global TRAIN global VAL_NUM # Create a queue that produces the filenames to read. filename_queue = tf.train.string_input_producer(get_data(data_dir)) # Read examples from files in the filename queue. read_input = read_cifar10(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE paddings = [[4,4],[4,4],[0,0]] reshaped_image = tf.pad(reshaped_image, paddings, "CONSTANT") height = IMAGE_SIZE width = IMAGE_SIZE # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Image processing for training the network. Note the many random # distortions applied to the image. # Because these operations are not commutative, consider randomizing # the order their operation. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_whitening(distorted_image) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print ('Filling queue with %d CIFAR images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size, shuffle=True)
def preprocess(self, image): """Preprocess a single image in [height, width, depth] layout.""" if self.subset == 'train' and self.use_distortion: # Pad 4 pixels on each dimension of feature map, done in mini-batch image = tf.image.resize_image_with_crop_or_pad(image, 40, 40) image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH]) image = tf.image.random_flip_left_right(image) return image
def test(is_train=True): """Instruction on how to read data from tfrecord""" # 1. use regular expression to find all files we want import re if is_train: CHUNK_RE = re.compile(r"train\d+\.tfrecords") else: CHUNK_RE = re.compile(r"test\d+\.tfrecords") processed_dir = './data' # 2. parse them into a list of file name chunk_files = [os.path.join(processed_dir, fname) for fname in os.listdir(processed_dir) if CHUNK_RE.match(fname)] # 3. pass argument into read method image, label = read_norb_tfrecord(chunk_files, 2) image = tf.image.random_brightness(image, max_delta=32. / 255.) image = tf.image.random_contrast(image, lower=0.5, upper=1.5) image = tf.image.resize_images(image, [48, 48]) """Batch Norm""" params_shape = [image.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) mean, variance = tf.nn.moments(image, [0, 1, 2]) image = tf.nn.batch_normalization(image, mean, variance, beta, gamma, 0.001) image = tf.random_crop(image, [32, 32, 1]) batch_size = 8 x, y = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=batch_size * 64, min_after_dequeue=batch_size * 32, allow_smaller_final_batch=False) logger.debug('x shape: {}, y shape: {}'.format(x.get_shape(), y.get_shape())) # 初始化所有的op init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(init) # 启动队列 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(2): val, l = sess.run([x, y]) # l = to_categorical(l, 12) print(val, l) coord.join() logger.debug('Test read tf record Succeed')
def process_image(encoded_image, is_training, height, width, resize_height=346, resize_width=346, thread_id=0, image_format="jpeg"): """Decode an image, resize and apply random dirtortions. In training, images are distorted slightly differently depending on thread_id. Args: encoded_image: String Tensor containing the image. is_training: Boolean; whether preprocessing for training or eval. height: Height of the output image. width: Width of the output image. resize_height: If > 0, resize height before crop to final dimensions. resize_width: If > 0, resize width before crop to final dimensions. thread_id: Preprocessing thread id used to select the ordering of color distortions. There should be a multiple of 2 preprocessing threads. image_format: "jpeg" or "png". Returns: A float32 Tensor of shape [height, width, 3] with values in [-1,1] Raises: ValueError: If image_format is invalid. """ # Helper function to log an image summary to the visualizer. Summaries are # only logged in thread 0. def image_summary(name, image): if not thread_id: tf.summary.image(name, tf.expand_dims(image, 0)) # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1). with tf.name_scope("decode", values=[encoded_image]): if image_format == "jpeg": image = tf.image.decode_jpeg(encoded_image, channels=3) elif image_format == "png": image = tf.image.decode_png(encoded_image, channels=3) else: raise ValueError("Invalid image format: %s" % image_format) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image_summary("original_image", image) # Resize image. assert (resize_height > 0) == (resize_width > 0) if resize_height: image = tf.image.resize_images(image, size=[resize_height, resize_width], method=tf.image.ResizeMethod.BILINEAR) # Crop to final dimensions. if is_training: image = tf.random_crop(image, [height, width, 3]) else: # Central crop, assuming resize_height > height, resize_width > width image = tf.image.resize_image_with_crop_or_pad(image, height, width) image_summary("resized_image", image) # Randomly distort the image. if is_training: image = distort_image(image, thread_id) image_summary("final_image", image) # Rescale to [-1, 1] instead of [0, 1] image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) return image
tf.sparse_reduce_max() tf.sparse_reduce_max_sparse() tf.reduce_all() tf.reduce_any() tf.reduce_join() tf.reduce_logsumexp() tf.reduce_max() tf.reduce_mean() tf.reduce_min() tf.reduce_prod() tf.reduce_sum() tf.reduced_shape() tf.random_crop() tf.random_gamma() tf.random_normal() tf.random_poisson() tf.random_poisson_v2() tf.random_shuffle() tf.random_uniform() tf.where() tf.while_loop() tf.write_file() tf.read_file() tf.record_input() tf.reshape() tf.restore_v2()
dropout2 = tf.nn.dropout(fc2, keep_prob) logits = tf.contrib.layers.fully_connected( dropout2, 10, activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer( tf.constant(0.0005, dtype=tf.float32))) cost = loss(logits, feed_labels) opt_mom = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) opt = opt_mom.minimize(cost) acc = accuracy(logits, feed_labels) img_scale_crop = tf.random_crop( tf.image.resize_images(aug_img, get_new_size()), [96, 96, 3]) img_rand_flip_lr = tf.image.random_flip_left_right(aug_img) img_rand_flip_ud = tf.image.random_flip_up_down(aug_img) sess = tf.Session() sess.run(tf.global_variables_initializer()) builder = tf.saved_model.builder.SavedModelBuilder("/output/cnn_model_final") while (ne < num_epochs): stime = time.time() print 'epoch::', ne + 1, '...' if ne != 0: np.random.shuffle(index) train_x = train_x[index] train_y = train_y[index]
def run(image_size, ind): label = showimage(ind) print(label) image_name = 'image_test.jpg' image_string = open(image_name, 'rb').read() image = tf.image.decode_jpeg(image_string, channels=3) if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) height = image_size width = image_size distorted_image1 = tf.random_crop(image, [height, width,3]) # Randomly flip the image horizontally. distorted_image2 = tf.image.random_flip_left_right(distorted_image1) # Because these operations are not commutative, consider randomizing # randomize the order their operation. distorted_image3 = tf.image.random_brightness(distorted_image2, max_delta=63) distorted_image4 = tf.image.random_contrast(distorted_image3,lower=0.2, upper=1.8) #Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(distorted_image4) # Save the processed image for Fathom with tf.Session() as sess: i1 = image.eval() i2 = distorted_image1.eval() i3 = distorted_image2.eval() i4 = distorted_image3.eval() i5 = distorted_image4.eval() i6 = float_image.eval() i7 = i6 for i in range(24): for j in range(24): for k in range(3): if i7[i][j][k] > 1: i7[i][j][k]=1 if i7[i][j][k] < -1: i7[i][j][k]=-1 fig = pylab.figure() a1 = fig.add_subplot(231) a1.set_title("original image") a2 = fig.add_subplot(232) a2.set_title("after crop") a3 = fig.add_subplot(233) a3.set_title("after flip") a4 = fig.add_subplot(234) a4.set_title("random brightness") a5 = fig.add_subplot(235) a5.set_title("random contrast") a6 = fig.add_subplot(236) a6.set_title("standardization") a1.imshow(i1) a2.imshow(i2) a3.imshow(i3) a4.imshow(i4) a5.imshow(i5) a6.imshow(i6) pylab.axis("off") pylab.show() fig.savefig('temp.png',dpi=fig.dpi)
def _read_and_decode(filename_queue, image_dim=28, distort=False, split='train'): """Reads a single record and converts it to a tensor. Args: filename_queue: Tensor Queue, list of input files. image_dim: Scalar, the height (and width) of the image in pixels. distort: Boolean, whether to distort the input or not. split: String, the split of the data (test or train) to read from. Returns: Dictionary of the (Image, label) and the image height. """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64), 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'depth': tf.FixedLenFeature([], tf.int64) }) # Convert from a scalar string tensor (whose single string has # length image_pixel*image_pixel) to a uint8 tensor with shape # [image_pixel, image_pixel, 1]. image = tf.decode_raw(features['image_raw'], tf.uint8) image = tf.reshape(image, [image_dim, image_dim, 1]) image.set_shape([image_dim, image_dim, 1]) # Convert from [0, 255] -> [-0.5, 0.5] floats. image = tf.cast(image, tf.float32) * (1. / 255) if distort: cropped_dim = image_dim - 4 if split == 'train': image = tf.reshape(image, [image_dim, image_dim]) image = tf.random_crop(image, [cropped_dim, cropped_dim]) # 0.26179938779 is 15 degress in radians image = tf.contrib.image.rotate( image, random.uniform(-0.26179938779, 0.26179938779)) image = tf.reshape(image, [cropped_dim, cropped_dim, 1]) image.set_shape([cropped_dim, cropped_dim, 1]) else: fraction = cropped_dim / image_dim image = tf.image.central_crop(image, central_fraction=fraction) image.set_shape([cropped_dim, cropped_dim, 1]) image_dim = cropped_dim # Convert label from a scalar uint8 tensor to an int32 scalar. label = tf.cast(features['label'], tf.int32) features = { 'images': image, 'labels': tf.one_hot(label, 10), 'recons_image': image, 'recons_label': label, } return features, image_dim
def main(): args = parser.parse_args() # We store all arguments in a json file. This has two advantages: # 1. We can always get back and see what exactly that experiment was # 2. We can resume an experiment as-is without needing to remember all flags. args_file = os.path.join(args.experiment_root, 'args.json') if args.resume: if not os.path.isfile(args_file): raise IOError('`args.json` not found in {}'.format(args_file)) print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) args_resumed['resume'] = True # This would be overwritten. # When resuming, we not only want to populate the args object with the # values from the file, but we also want to check for some possible # conflicts between loaded and given arguments. for key, value in args.__dict__.items(): if key in args_resumed: resumed_value = args_resumed[key] if resumed_value != value: print('Warning: For the argument `{}` we are using the' ' loaded value `{}`. The provided value was `{}`' '.'.format(key, resumed_value, value)) comand = input('Would you like to restore it?(yes/no)') if comand == 'yes': args.__dict__[key] = resumed_value print( 'For the argument `{}` we are using the loaded value `{}`.' .format(key, args.__dict__[key])) else: print( 'For the argument `{}` we are using the provided value `{}`.' .format(key, args.__dict__[key])) else: print('Warning: A new argument was added since the last run:' ' `{}`. Using the new value: `{}`.'.format(key, value)) os.remove(args_file) with open(args_file, 'w') as f: json.dump(vars(args), f, ensure_ascii=False, indent=2, sort_keys=True) else: # If the experiment directory exists already, we bail in fear. if os.path.exists(args.experiment_root): if os.listdir(args.experiment_root): print('The directory {} already exists and is not empty.' ' If you want to resume training, append --resume to' ' your call.'.format(args.experiment_root)) exit(1) else: os.makedirs(args.experiment_root) # Store the passed arguments for later resuming and grepping in a nice # and readable format. with open(args_file, 'w') as f: json.dump(vars(args), f, ensure_ascii=False, indent=2, sort_keys=True) log_file = os.path.join(args.experiment_root, "train") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('train') # Also show all parameter values at the start, for ease of reading logs. log.info('Training using the following parameters:') for key, value in sorted(vars(args).items()): log.info('{}: {}'.format(key, value)) # Check them here, so they are not required when --resume-ing. if not args.train_set: parser.print_help() log.error("You did not specify the `train_set` argument!") sys.exit(1) if not args.image_root: parser.print_help() log.error("You did not specify the required `image_root` argument!") sys.exit(1) # Load the data from the TxT file. see Common.load_dataset function for details pids, fids = common.load_dataset(args.train_set, args.image_root) max_fid_len = max(map(len, fids)) # We'll need this later for logfiles. # Setup a tf.Dataset where one "epoch" loops over all PIDS. # PIDS are shuffled after every epoch and continue indefinitely. unique_pids = np.unique(pids) dataset = tf.data.Dataset.from_tensor_slices(unique_pids) dataset = dataset.shuffle(len(unique_pids)) # Constrain the dataset size to a multiple of the batch-size, so that # we don't get overlap at the end of each epoch. dataset = dataset.take((len(unique_pids) // args.batch_p) * args.batch_p) dataset = dataset.repeat(None) # Repeat forever. Funny way of stating it. # For every PID, get K images. dataset = dataset.map(lambda pid: sample_k_fids_for_pid( pid, all_fids=fids, all_pids=pids, batch_k=args.batch_k )) # now the dataset has been modified as [selected_fids # , pid] due to the return of the function 'sample_k_fids_for_pid' # Ungroup/flatten the batches for easy loading of the files. dataset = dataset.apply(tf.contrib.data.unbatch()) # Convert filenames to actual image tensors. net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) dataset = dataset.map( lambda fid, pid: common.fid_to_image(fid, pid, image_root=args.image_root, image_size=pre_crop_size if args. crop_augment else net_input_size), num_parallel_calls=args.loading_threads ) # now the dataset has been modified as [selected_images # , fid, pid] due to the return of the function 'fid_to_image' # Augment the data if specified by the arguments. if args.flip_augment: dataset = dataset.map(lambda im, fid, pid: (tf.image.random_flip_left_right(im), fid, pid)) if args.crop_augment: dataset = dataset.map(lambda im, fid, pid: (tf.random_crop( im, net_input_size + (3, )), fid, pid)) # Group it back into PK batches. batch_size = args.batch_p * args.batch_k dataset = dataset.batch(batch_size) # Overlap producing and consuming for parallelism. dataset = dataset.prefetch(1) # Since we repeat the data infinitely, we only need a one-shot iterator. images, fids, pids = dataset.make_one_shot_iterator().get_next() # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) # Feed the image through the model. The returned `body_prefix` will be used # further down to load the pre-trained weights for all variables with this # prefix. endpoints, body_prefix = model.endpoints(images, is_training=True) if args.head_name == 'fusion': with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, args.model_name, is_training=True) else: with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=True) # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. # dists = loss.cdist(endpoints['emb'], endpoints['emb'], metric=args.metric) # losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[args.loss]( # dists, pids, args.margin, batch_precision_at_k=args.batch_k-1) # # '_' stands for the boolean matrix shows topK where the correct match of the identities occurs # shape=(batch_size,K) # 更改loss1 dists1 = loss.cdist(endpoints['feature1'], endpoints['feature1'], metric=args.metric) losses1, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists1, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists2 = loss.cdist(endpoints['feature2'], endpoints['feature2'], metric=args.metric) losses2, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists2, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists3 = loss.cdist(endpoints['feature3'], endpoints['feature3'], metric=args.metric) losses3, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists3, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists4 = loss.cdist(endpoints['feature4'], endpoints['feature4'], metric=args.metric) losses4, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists4, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists_fu = loss.cdist(endpoints['fusion_layer'], endpoints['fusion_layer'], metric=args.metric) losses_fu, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[ args.loss](dists_fu, pids, args.margin, batch_precision_at_k=args.batch_k - 1) losses = losses1 + losses2 + losses3 + losses4 + losses_fu # losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss_m.LOSS_CHOICES[args.loss]( # endpoints, pids, args.margin, args.model_name, batch_precision_at_k=args.batch_k - 1, metric =args.metric # ) # Count the number of active entries, and compute the total batch loss. num_active = tf.reduce_sum(tf.cast(tf.greater(losses, 1e-5), tf.float32)) # 此处losses即为 pospair 比 negpair+margin 还大的部分 loss_mean = tf.reduce_mean(losses) # Some logging for tensorboard. tf.summary.histogram('loss_distribution', losses) tf.summary.scalar('loss', loss_mean) tf.summary.scalar('batch_top1', train_top1) tf.summary.scalar('batch_prec_at_{}'.format(args.batch_k - 1), prec_at_k) tf.summary.scalar('active_count', num_active) #tf.summary.histogram('embedding_dists', dists) tf.summary.histogram('embedding_pos_dists', pos_dists) tf.summary.histogram('embedding_neg_dists', neg_dists) tf.summary.histogram('embedding_lengths', tf.norm(endpoints['emb_raw'], axis=1)) # Create the mem-mapped arrays in which we'll log all training detail in # addition to tensorboard, because tensorboard is annoying for detailed # inspection and actually discards data in histogram summaries. if args.detailed_logs: log_embs = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'embeddings'), dtype=np.float32, shape=(args.train_iterations, batch_size, args.embedding_dim)) log_loss = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'losses'), dtype=np.float32, shape=(args.train_iterations, batch_size)) log_fids = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'fids'), dtype='S' + str(max_fid_len), shape=(args.train_iterations, batch_size)) # These are collected here before we add the optimizer, because depending # on the optimizer, it might add extra slots, which are also global # variables, with the exact same prefix. model_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, body_prefix) # Define the optimizer and the learning-rate schedule. # Unfortunately, we get NaNs if we don't handle no-decay separately. global_step = tf.Variable( 0, name='global_step', trainable=False) # 'global_step' means the number of batches seen # by graph if 0 <= args.decay_start_iteration < args.train_iterations: learning_rate = tf.train.exponential_decay( args.learning_rate, tf.maximum(0, global_step - args.decay_start_iteration ), # decay every 'lr_decay_steps' after the # 'decay_start_iteration' # args.train_iterations - args.decay_start_iteration, args.weight_decay_factor) args.lr_decay_steps, args.lr_decay_factor, staircase=True) else: learning_rate = args.learning_rate # the case when we set 'decay_start_iteration' as -1 tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-3) # Feel free to try others! # optimizer = tf.train.AdadeltaOptimizer(learning_rate) # Update_ops are used to update batchnorm stats. with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss_mean, global_step=global_step) # Define a saver for the complete model. checkpoint_saver = tf.train.Saver(max_to_keep=0) with tf.Session(config=config) as sess: if args.resume: # In case we're resuming, simply load the full checkpoint to init. last_checkpoint = tf.train.latest_checkpoint(args.experiment_root) log.info('Restoring from checkpoint: {}'.format(last_checkpoint)) checkpoint_saver.restore(sess, last_checkpoint) else: # But if we're starting from scratch, we may need to load some # variables from the pre-trained weights, and random init others. sess.run(tf.global_variables_initializer()) if args.initial_checkpoint is not None: saver = tf.train.Saver(model_variables) saver.restore( sess, args.initial_checkpoint ) # restore the pre-trained parameter from online model # In any case, we also store this initialization as a checkpoint, # such that we could run exactly re-producable experiments. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=0) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.experiment_root, sess.graph) start_step = sess.run(global_step) log.info('Starting training from iteration {}.'.format(start_step)) # Finally, here comes the main-loop. This `Uninterrupt` is a handy # utility such that an iteration still finishes on Ctrl+C and we can # stop the training cleanly. with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u: for i in range(start_step, args.train_iterations): # Compute gradients, update weights, store logs! start_time = time.time() _, summary, step, b_prec_at_k, b_embs, b_loss, b_fids = \ sess.run([train_op, merged_summary, global_step, prec_at_k, endpoints['emb'], losses, fids]) elapsed_time = time.time() - start_time # Compute the iteration speed and add it to the summary. # We did observe some weird spikes that we couldn't track down. summary2 = tf.Summary() summary2.value.add(tag='secs_per_iter', simple_value=elapsed_time) summary_writer.add_summary(summary2, step) summary_writer.add_summary(summary, step) if args.detailed_logs: log_embs[i], log_loss[i], log_fids[ i] = b_embs, b_loss, b_fids # Do a huge print out of the current progress. seconds_todo = (args.train_iterations - step) * elapsed_time log.info( 'iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, ' 'batch-p@{}: {:.2%}, ETA: {} ({:.2f}s/it)'.format( step, float(np.min(b_loss)), float(np.mean(b_loss)), float(np.max(b_loss)), args.batch_k - 1, float(b_prec_at_k), timedelta(seconds=int(seconds_todo)), elapsed_time)) sys.stdout.flush() sys.stderr.flush() # Save a checkpoint of training every so often. if (args.checkpoint_frequency > 0 and step % args.checkpoint_frequency == 0): checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step) # Stop the main-loop at the end of the step, if requested. if u.interrupted: log.info("Interrupted on request!") break # Store one final checkpoint. This might be redundant, but it is crucial # in case intermediate storing was disabled and it saves a checkpoint # when the process was interrupted. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step)
def cnn_model_fn(features, labels, mode, num_classes=20): # Write this function """Model function for CNN.""" # Input Layer N = features["x"].shape[0] # input_layer = tf.reshape(features["x"], [-1, 256, 256, 3]) if mode != tf.estimator.ModeKeys.PREDICT: crop_layer = [ tf.image.random_flip_left_right( tf.image.random_flip_up_down( tf.random_crop(features["x"][0, :, :, :], [224, 224, 3]))) ] for i in range(1, N): crop_layer = tf.concat([ crop_layer, [ tf.image.random_flip_left_right( tf.image.random_flip_up_down( tf.random_crop(features["x"][i, :, :, :], [224, 224, 3]))) ] ], 0) crop_layer = tf.image.resize_images(crop_layer, [256, 256]) else: crop_layer = tf.image.resize_images(features["x"], [256, 256]) # conv(k, s, n, p) # conv(11, 4, 96, 'VALID') # relu() with tf.variable_scope('conv1') as scope: conv1 = tf.layers.conv2d( inputs=crop_layer, kernel_size=[11, 11], strides=4, filters=96, padding="valid", kernel_initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01), bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu) scope.reuse_variables() weights = tf.get_variable('conv2d/kernel') tf.summary.image('conv1/weghts', weight_2_grid(weights)) # max_pool(3, 2) pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3], strides=2) # conv(5, 1, 256, 'SAME') # relu() conv2 = tf.layers.conv2d( inputs=pool1, kernel_size=[5, 5], strides=1, filters=256, padding="same", kernel_initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01), bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu) # max_pool(3, 2) pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[3, 3], strides=2) # conv(3, 1, 384, 'SAME') # relu() conv3 = tf.layers.conv2d( inputs=pool2, kernel_size=[3, 3], strides=1, filters=384, padding="same", kernel_initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01), bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu) # conv(3, 1, 384, 'SAME') # relu() conv4 = tf.layers.conv2d( inputs=conv3, kernel_size=[3, 3], strides=1, filters=384, padding="same", kernel_initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01), bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu) # conv(3, 1, 256, 'SAME') # relu() conv5 = tf.layers.conv2d( inputs=conv4, kernel_size=[3, 3], strides=1, filters=256, padding="same", kernel_initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01), bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu) # max_pool(3, 2) pool3 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2) # flatten() pool3_flat = tf.reshape(pool3, [-1, 6 * 6 * 256]) # pool3_flat = tf.reshape(pool3, [int((labels.shape)[0]), -1]) # fully_connected(4096) # relu() dense1 = tf.layers.dense(inputs=pool3_flat, units=4096, activation=tf.nn.relu) # dropout(0.5) dropout1 = tf.layers.dropout(inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) # fully_connected(4096) # relu() dense2 = tf.layers.dense(inputs=dropout1, units=4096, activation=tf.nn.relu) # dropout(0.5) dropout2 = tf.layers.dropout(inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) # fully_connected(20) # Logits Layer logits = tf.layers.dense(inputs=dropout2, units=20) predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), # Add `softmax_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": tf.sigmoid(logits, name="sigmoid_tensor") } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) # onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) onehot_labels = labels loss = tf.identity(tf.losses.sigmoid_cross_entropy( multi_class_labels=onehot_labels, logits=logits), name='loss') tf.summary.scalar('training_loss', loss) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_global_step() decay_LR = tf.train.exponential_decay(0.001, global_step, 10000, 0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate=decay_LR, momentum=0.9) train_op = optimizer.minimize(loss=loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["probabilities"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = facenet.get_dataset(args.data_dir) if args.filter_filename: train_set = filter_dataset(train_set, args.filter_filename, args.filter_percentile, args.filter_min_nrof_images_per_class) nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list) > 0, 'The dataset should not be empty' # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_png(file_contents) if args.random_rotate: image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], } # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) bottleneck = slim.fully_connected( prelogits, args.embedding_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, scope='Bottleneck', reuse=False) logits = slim.fully_connected( bottleneck, len(train_set), activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(bottleneck, 1, 1e-10, name='embeddings') # Add center loss if args.center_loss_factor > 0.0: prelogits_center_loss, _ = facenet.center_loss( prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection( tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) tf.train.start_queue_runners(sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer) return model_dir
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness): """Creates the operations to apply the specified distortions. During training it can help to improve the results if we run the images through simple distortions like crops, scales, and flips. These reflect the kind of variations we expect in the real world, and so can help train the model to cope with natural data more effectively. Here we take the supplied parameters and construct a network of operations to apply them to an image. Cropping ~~~~~~~~ Cropping is done by placing a bounding box at a random position in the full image. The cropping parameter controls the size of that box relative to the input image. If it's zero, then the box is the same size as the input and no cropping is performed. If the value is 50%, then the crop box will be half the width and height of the input. In a diagram it looks like this: < width > +---------------------+ | | | width - crop% | | < > | | +------+ | | | | | | | | | | | | | | +------+ | | | | | +---------------------+ Scaling ~~~~~~~ Scaling is a lot like cropping, except that the bounding box is always centered and its size varies randomly within the given range. For example if the scale percentage is zero, then the bounding box is the same size as the input and no scaling is applied. If it's 50%, then the bounding box will be in a random range between half the width and height and full size. Args: flip_left_right: Boolean whether to randomly mirror images horizontally. random_crop: Integer percentage setting the total margin used around the crop box. random_scale: Integer percentage of how much to vary the scale by. random_brightness: Integer range to randomly multiply the pixel values by. graph. Returns: The jpeg input layer and the distorted result tensor. """ jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH) decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) margin_scale = 1.0 + (random_crop / 100.0) resize_scale = 1.0 + (random_scale / 100.0) margin_scale_value = tf.constant(margin_scale) resize_scale_value = tf.random_uniform(tensor_shape.scalar(), minval=1.0, maxval=resize_scale) scale_value = tf.multiply(margin_scale_value, resize_scale_value) precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH) precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT) precrop_shape = tf.stack([precrop_height, precrop_width]) precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) precropped_image = tf.image.resize_bilinear(decoded_image_4d, precrop_shape_as_int) precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) cropped_image = tf.random_crop(precropped_image_3d, [MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH, MODEL_INPUT_DEPTH]) if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image brightness_min = 1.0 - (random_brightness / 100.0) brightness_max = 1.0 + (random_brightness / 100.0) brightness_value = tf.random_uniform(tensor_shape.scalar(), minval=brightness_min, maxval=brightness_max) brightened_image = tf.multiply(flipped_image, brightness_value) distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') return jpeg_data, distort_result
def main(args): config = config_reader.triplets_afix_config(args.config) np.random.seed(seed=config.seed) network = importlib.import_module(config.model_def) chokepoint_dataset = chokepoint.chokepoint_data(config.chokepoint_still_dir, config.chokepoint_video_dir, config.chokepoint_pairs) fold_list = [([0, 1], [2, 3, 4]), ([1, 2], [3, 4, 0]), ([2, 3], [4, 0, 1]), ([3, 4], [0, 1, 2]), ([4, 0], [1, 2, 3])] for fold_idx in range(5): print('Fold: {}'.format(fold_idx)) train_folds, evaluation_folds = fold_list[fold_idx] # Train set chokepoint_train_set = chokepoint_dataset.get_S2V_dataset(train_folds) chokepoint1_paths, chokepoint1_issame = chokepoint_dataset.get_pairs(train_folds) # Validation set chokepoint2_paths, chokepoint2_issame = chokepoint_dataset.get_pairs(evaluation_folds) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), 'fold{}_'.format(fold_idx) + subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), 'fold{}_'.format(fold_idx) + subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, lfw_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None,3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None,3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3,), (3,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_crop: image = tf.random_crop(image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1,3,args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. # summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder:True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder:True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Evaluate on COX evaluate(sess, chokepoint1_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, chokepoint1_issame, 100, 2, log_dir, step, summary_writer, args.embedding_size, tag='chokepoint_train') evaluate(sess, chokepoint2_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, chokepoint2_issame, 100, 3, log_dir, step, summary_writer, args.embedding_size, tag='chokepoint_eval') # Evaluate on LFW # if args.lfw_dir: # lfw_result = evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, # batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, # lfw_issame, args.batch_size, # args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size, tag='lfw') # Train for one epoch train(args, sess, chokepoint_train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) return model_dir
def vgg16(self, inp): """ load variable from npy to build the VGG :param inp: rgb image [batch, height, width, 3] values scaled [0., 255.] """ start_time = time.time() PrintWithTime(BarFormat("build model started (VGG-16)")) # input is images of [256, 256, 3], random crop and flip to [224, 224, # 3] distorted_image = tf.stack([ tf.random_crop(tf.image.random_flip_left_right(each_image), [224, 224, 3]) for each_image in tf.unstack(inp) ]) self.train_layers = [] self.train_last_layer = [] self.classifyLastLayer = [] # Convert RGB to BGR red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=distorted_image) assert red.get_shape().as_list()[1:] == [224, 224, 1] assert green.get_shape().as_list()[1:] == [224, 224, 1] assert blue.get_shape().as_list()[1:] == [224, 224, 1] bgr = tf.concat(axis=3, values=[ blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2], ]) assert bgr.get_shape().as_list()[1:] == [224, 224, 3] radius = 2 alpha = 2e-05 beta = 0.75 bias = 1.0 self.conv1_1 = self.conv_layer(bgr, "conv1_1") self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2") self.pool1 = self.max_pool(self.conv1_2, 'pool1') self.lrn1 = tf.nn.local_response_normalization(self.pool1, depth_radius=radius, alpha=alpha, beta=beta, bias=bias) self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") self.pool2 = self.max_pool(self.conv2_2, 'pool2') self.lrn2 = tf.nn.local_response_normalization(self.pool2, depth_radius=radius, alpha=alpha, beta=beta, bias=bias) self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") self.pool3 = self.max_pool(self.conv3_3, 'pool3') self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") self.pool4 = self.max_pool(self.conv4_3, 'pool4') self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") self.pool5 = self.max_pool(self.conv5_3, 'pool5') self.fc6 = self.fc_layer(self.pool5, "fc6") assert self.fc6.get_shape().as_list()[1:] == [4096] self.relu6 = tf.nn.dropout(tf.nn.relu( self.fc6), 0.5) if self._train else tf.nn.relu(self.fc6) self.fc7 = self.fc_layer(self.relu6, "fc7") self.relu7 = tf.nn.dropout(tf.nn.relu( self.fc7), 0.5) if self._train else tf.nn.relu(self.fc7) ''' ADD ONE MORE DENSE 4096 -> D ''' # FC8 # Output output_dim with tf.name_scope('fc8') as scope: fc8w = tf.Variable(tf.random_normal([4096, 300], dtype=tf.float32, stddev=1e-2), name='weights') fc8b = tf.Variable(tf.constant(0.0, shape=[300], dtype=tf.float32), name='biases') self.fc8l = tf.nn.bias_add(tf.matmul(self.relu7, fc8w), fc8b) self.fc8 = tf.nn.tanh(self.fc8l) self.train_last_layer += [fc8w, fc8b] # Classify # Output label_num with tf.name_scope('cls') as scope: clsw = tf.Variable(tf.random_normal([4096, self.n_class], dtype=tf.float32, stddev=1e-2), name='weights') clsb = tf.Variable(tf.constant(0.0, shape=[self.n_class], dtype=tf.float32), name='biases') self.cls = tf.nn.bias_add(tf.matmul(self.relu7, clsw), clsb) self.classifyLastLayer += [clsw, clsb] PrintWithTime( ("build model finished: %ds" % (time.time() - start_time)))
def read_and_decode(self, filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) _features = { feature: tf.FixedLenFeature([], tf.string) for feature in self.features.keys() } features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features=_features) outputs = {} for feature_name, feature in self.features.items(): # Convert from a scalar string tensor (whose single string has image = tf.decode_raw(features[feature_name], tf.uint8) # Change to tf.int8 if 'depth' in feature: shape = [ feature['in_width'], feature['in_width'], feature['depth'] ] elif 'shape' in feature: shape = feature['shape'] else: shape = [feature['in_width'], feature['in_width']] raw_shape = np.prod(shape) image.set_shape([raw_shape]) image = tf.reshape(image, shape) outputs[feature_name] = image outputs = { k: tf.cast(v, tf.float32) / 255.0 for k, v in outputs.items() } #return outputs if len(shape) == 2: outputs = {k: tf.expand_dims(v, -1) for k, v in outputs.items()} # Rotation - Random Flip left, right, random, up down if self.flipping: outputs = { k: tf.image.random_flip_up_down(v, seed=0) for k, v in outputs.items() } outputs = { k: tf.image.random_flip_left_right(v, seed=1) for k, v in outputs.items() } if self.random_brightness: max_delta = 0.1 image_name = self.features.keys()[0] outputs[image_name] = tf.image.random_brightness( outputs[image_name], max_delta, seed=0) outputs[image_name] = tf.image.random_contrast(outputs[image_name], 0.7, 1, seed=0) outputs = {k: tf.squeeze(v) for k, v in outputs.items()} # Rotation by degree if self.rotating: angle = tf.random_uniform([1], -self.max_degree, self.max_degree, dtype=tf.float32) outputs = { k: tip.rotate_image(v, angle) for k, v in outputs.items() } # Translation Invariance - Crop 712 - > 512 and 324 -> 224 if self.random_crop: outputs = { k: tf.random_crop( v, [self.features[k]['width'], self.features[k]['width']], seed=10) for k, v in outputs.items() } else: outputs = { k: tip.central_crop( v, [self.features[k]['width'], self.features[k]['width']]) for k, v in outputs.items() } # Convert from [0, 255] -> [-0.5, 0.5] floats. outputs = { k: tf.cast(v, tf.float32) / 255.0 for k, v in outputs.items() } outputs = list(OrderedDict(sorted(outputs.items())).values()) return outputs
def get_batch(image, label, batch_size, capacity): ''' Args: image: list type label: list type image_W: image width image_H: image height batch_size: batch size capacity: the maximum elements in queue Returns: image_batch: 4D tensor [batch_size, width, height, 3], dtype=tf.float32 #原教程为rgb,3 channels;现要用gray,所以是1 channel? label_batch: 1D tensor [batch_size], dtype=tf.int32 ''' image = tf.cast(image, tf.string) label = tf.cast(label, tf.int32) # make an input queue input_queue = tf.train.slice_input_producer([image, label]) label = input_queue[1] image_contents = tf.read_file(input_queue[0]) image = tf.image.decode_jpeg(image_contents, channels=3) ###################################### # data argumentation should go to here ###################################### # I think it will be better if we pre-process the images first # image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H) # data argumentation image = tf.random_crop(image, [196, 196, 3]) # tensor shape not equal,how ? image = tf.image.random_flip_left_right(image) image = tf.image.random_contrast(image, lower=0.3, upper=1.0) image = tf.image.random_brightness(image, max_delta=0.2) # image = tf.image.random_hue(image, max_delta=0.05) # image = tf.image.random_saturation(image, lower=0.0, upper=2.0) # Limit the image pixels between [0, 1] in case of overflow. # image = tf.minimum(image, 1.0) # image = tf.maximum(image, 0.0) # if you want to test the generated batches of images, you might want to comment the following line. # !!注意:如果想看到正常的图片,请注释掉(标准化)和 (image_batch = tf.cast(image_batch, tf.float32)) # 但是训练时不要注释掉!!!!! image = tf.image.per_image_standardization(image) image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity) #you can also use shuffle_batch # image_batch, label_batch = tf.train.shuffle_batch([image,label], # batch_size=BATCH_SIZE, # num_threads=64, # capacity=CAPACITY, # min_after_dequeue=CAPACITY-1) label_batch = tf.reshape(label_batch, [batch_size]) image_batch = tf.cast(image_batch, tf.float32) return image_batch, label_batch
def distorted_inputs( data_dir, batch_size, distort=2, num_examples_per_epoch_for_train=NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN): """Construct distorted input for training using the Reader ops. Args: data_dir: Path to the dataset data directory. batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, INPUT_IMAGE_CHANNELS] size. labels: Labels. 1D tensor of [batch_size] size. """ # for CIFAR-10 print("From within distorted_inputs, data_dir = {}here".format(data_dir)) #filenames = [os.path.join(data_dir, 'patches_%d.bin' % i) for i in xrange(0, 8)] #filenames = [os.path.join(data_dir, 'patches_train_%d.bin' % i) for i in xrange(0, 1)] #filenames = [os.path.join(data_dir, 'train_crop.bin')] filenames = [ os.path.join(data_dir, 'train_%d.bin' % i) for i in xrange(0, 10) ] print("Expected filenames: {}".format(filenames)) myfilenames = [] for f in filenames: if tf.gfile.Exists(f): myfilenames.append(f) print("Found filenames: {}".format(myfilenames)) filenames = myfilenames if len(filenames) == 0: raise ValueError('Failed to find any files to process') for f in filenames: if not tf.gfile.Exists(f): raise ValueError('Failed to find file: ' + f) # Create a queue that produces the filenames to read. filename_queue = tf.train.string_input_producer(filenames) # Read examples from files in the filename queue. read_input = read_dataset(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_HEIGHT width = IMAGE_WIDTH # Image processing for training the network. Note the many random # distortions applied to the image. if distort == 1: # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(reshaped_image, [height, width, INPUT_IMAGE_CHANNELS]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing # the order their operation. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) elif distort == 2: distorted_image = tf.random_crop(reshaped_image, [height, width, INPUT_IMAGE_CHANNELS]) else: distorted_image = tf.image.resize_image_with_crop_or_pad( reshaped_image, width, height) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(distorted_image) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(num_examples_per_epoch_for_train * min_fraction_of_examples_in_queue) print('Filling queue with %d images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size, shuffle=True)
def read_image_from_filename(filename, batch_size, num_threads=4, output_height=128, output_width=128, min_after_dequeue=5000, num_channels=1, use_shuffle_batch=True, scope=None): with tf.variable_scope(scope, "image_producer"): textReader = tf.TextLineReader() csv_path = tf.train.string_input_producer([filename]) _, csv_content = textReader.read(csv_path) artifact_filenames, reference_filenames, quality = tf.decode_csv( csv_content, record_defaults=[[""], [""], [""]]) # when training use_shuffle_batch must be True # else (e.g. evaluation) evaluation code runs in single epoch and # use tf.train.batch instead tf.train.shuffle_batch if use_shuffle_batch: num_epochs = None else: num_epochs = 1 """ # this method is from https://stackoverflow.com/q/34340489 # use tf.train.slice_input_producer instead of string_input_producer # and tf.read_file instead of tf.WholeFileReader.read input_queue = tf.train.slice_input_producer( [artifact_filenames, reference_filenames, labels], num_epochs=num_epochs, shuffle=False) artifact_data = tf.read_file(input_queue[0]) reference_data = tf.read_file(input_queue[1]) label_data = tf.read_file(input_queue[2]) """ artifact_data = tf.read_file(artifact_filenames) reference_data = tf.read_file(reference_filenames) artifact_im = tf.image.decode_png(artifact_data, channels=num_channels) reference_im = tf.image.decode_png(reference_data, channels=num_channels) # concat all images in channel axis to randomly crop together concated_im = tf.concat([artifact_im, reference_im], axis=2) if use_shuffle_batch: concated_im = tf.random_crop( concated_im, [output_height, output_width, num_channels + num_channels]) elif output_height > 0 and output_width > 0 and not use_shuffle_batch: concated_im = tf.image.resize_image_with_crop_or_pad( concated_im, output_height, output_width) if use_shuffle_batch: capacity = min_after_dequeue + 10 * batch_size im_batch = tf.train.shuffle_batch( [concated_im], batch_size=batch_size, capacity=capacity, num_threads=num_threads, min_after_dequeue=min_after_dequeue, allow_smaller_final_batch=True, name="shuffle_batch") else: im_batch, label_batch = tf.train.batch( [concated_im], batch_size=batch_size, num_threads=num_threads, allow_smaller_final_batch=True, name="batch") # split concatenated data artifact_batch, reference_batch = tf.split( im_batch, [num_channels, num_channels], axis=3) artifact_batch = tf.cast(artifact_batch, tf.float32) / 127.5 - 1.0 reference_batch = tf.cast(reference_batch, tf.float32) / 127.5 - 1.0 return artifact_batch, reference_batch
def train_data(input_data, is_training): a = tf.pad(input_data, [[0, 0], [padding_pixel, padding_pixel], [padding_pixel, padding_pixel], [0, 0]]) a = tf.random_crop(a, size=[BATCH_SIZE, 32, 32, 3]) return tf.cond(is_training, lambda: a, lambda: input_data)
def _random_distord(images, labels): images = tf.image.random_flip_left_right(images) images = tf.image.random_flip_up_down(images) # angle = tf.random_uniform(shape=(1,), minval=0, maxval=90) # images = tf.contrib.image.rotate(images, angle * math.pi / 180, interpolation='BILINEAR') # Rotation and transformation # print(images.shape) # = (?, 299, 299, ?) print('images.shape:', images.shape) w, h = IMAGE_SIZE a = max(w, h) d = math.ceil(a * (math.sqrt(2) - 1) / 2) print('paddings d =', d) paddings = tf.constant([[0, 0], [d, d], [d, d], [0, 0]]) images = tf.pad(images, paddings, "SYMMETRIC") #images = tf.image.resize_image_with_crop_or_pad(images, w+d, h+d) print('images.shape:', images.shape) angle = tf.random_uniform(shape=(1,), minval=0, maxval=settings.rotation_max_angle) images = tf.contrib.image.rotate(images, angle * math.pi / 180, interpolation='BILINEAR') #images = tf.image.crop_to_bounding_box(images, d, d, s+d, s+d) # Transformation #transform1 = tf.constant([1.0, 0.2, -30.0, 0.2, 1.0, 0.0, 0.0, 0.0], dtype=tf.float32) # transform is vector of length 8 or tensor of size N x 8 # [a0, a1, a2, b0, b1, b2, c0, c1] a0 = tf.constant([1.0]) a1 = tf.random_uniform(shape=(1,), minval=0.0, maxval=settings.transform_maxval) a2 = tf.constant([-30.0]) b0 = tf.random_uniform(shape=(1,), minval=0.0, maxval=settings.transform_maxval) b1 = tf.constant([1.0]) b2 = tf.constant([-30.0]) c0 = tf.constant([0.0]) c1 = tf.constant([0.0]) transform1 = tf.concat(axis=0, values=[a0, a1, a2, b0, b1, b2, c0, c1]) #transform = tf.tile(tf.expand_dims(transform1, 0), [batch, 1]) #print('Added transformations:', transform) images = tf.contrib.image.transform(images, transform1) images = tf.image.resize_image_with_crop_or_pad(images, h, w) # --- zoom = 1.1 w_crop = math.ceil(w / zoom) h_crop = math.ceil(h / zoom) #batch_size = int(images.shape[0]) #print(images.shape) batch_size = tf.size(images) / (3*h*w) images = tf.random_crop(images, [batch_size, h_crop, w_crop, 3]) images = tf.image.resize_images(images, [h, w]) # --- # end of Rotation and Transformation block images = tf.image.random_hue(images, max_delta=0.05) images = tf.image.random_contrast(images, lower=0.9, upper=1.5) images = tf.image.random_brightness(images, max_delta=0.1) images = tf.image.random_saturation(images, lower=1.0, upper=1.5) #images = tf.image.per_image_standardization(images) images = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), images) #images = tf.minimum(images, 1.0) #images = tf.maximum(images, 0.0) #images.set_shape([None, None, None, 3]) images.set_shape([None, 299, 299, 3]) return images, labels
def get_batch(paths, options): """Returns a data split of the RECOLA dataset, which was saved in tfrecords format. Args: split_name: A train/test/valid split name. Returns: The raw audio examples and the corresponding arousal/valence labels. """ shuffle = options['shuffle'] batch_size = options['batch_size'] num_classes = options['num_classes'] crop_size = options['crop_size'] horizontal_flip = options['horizontal_flip'] # root_path = Path(dataset_dir) / split_name # paths = [str(x) for x in root_path.glob('*.tfrecords')] filename_queue = tf.train.string_input_producer(paths, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'video': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64) }) video = tf.cast(tf.decode_raw(features['video'], tf.uint8), tf.float32) #/ 255. label = features['label'] #tf.decode_raw(features['label'], tf.int64) # Number of threads should always be one, in order to load samples # sequentially. videos, labels = tf.train.batch([video, label], batch_size, num_threads=1, capacity=1000, dynamic_pad=True) videos = tf.reshape(videos, (batch_size, 29, 118, 118, 1)) #labels = tf.reshape(labels, (batch_size, 1)) labels = tf.contrib.layers.one_hot_encoding(labels, num_classes) # if is_training: # resized_image = tf.image.resize_images(frame, [crop_size, 110]) # random cropping if crop_size is not None: videos = tf.random_crop(videos, [batch_size, 29, crop_size, crop_size, 1]) # random left right flip if horizontal_flip: sample = tf.random_uniform(shape=[], minval=0, maxval=1, dtype=tf.float32) option = tf.less(sample, 0.5) videos = tf.cond(option, lambda: tf.map_fn(video_left_right_flip, videos), lambda: tf.map_fn(tf.identity, videos)) # lambda: video_left_right_flip(videos), # lambda: tf.identity(videos)) videos = normalize( videos) #tf.cast(videos, tf.float32) * (1. / 255.) - 0.5 return videos, labels
def image_augmentations(image, data_augmentations, model_input_image_size, label=None): """Coordinating image augmentations for both image and heatmap.""" im_size = [int(x) for x in image.get_shape()] im_size_check = np.any( np.less_equal(model_input_image_size[:2], im_size[:2])) if data_augmentations is not None: # Pixel/image-level augmentations if 'singleton' in data_augmentations: image = tf.expand_dims(image, axis=-1) print 'Adding singleton dimension to image.' if 'singleton_label' in data_augmentations: label = tf.expand_dims(label, axis=-1) print 'Adding singleton dimension to label.' if 'bsds_crop' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' # intermediate_size = [171, 256, 3] # intermediate_size = [256, 384, 3] intermediate_size = [324, 484, 3] image = tf.image.resize_image_with_crop_or_pad( image, intermediate_size[0], intermediate_size[1]) label = tf.image.resize_image_with_crop_or_pad( label, intermediate_size[0], intermediate_size[1]) print 'Applying BSDS crop.' if 'uint8_rescale' in data_augmentations: image = tf.cast(image, tf.float32) / 255. print 'Applying uint8 rescale to the image.' if 'uint8_rescale_label' in data_augmentations: label = tf.cast(label, tf.float32) / 255. print 'Applying uint8 rescale to the label.' if 'uint8_rescale_-1_1' in data_augmentations: image = 2 * (tf.cast(image, tf.float32) / 255.) - 1 print 'Applying uint8 rescale.' if 'image_to_bgr' in data_augmentations: image = tf.stack([image[:, :, 2], image[:, :, 1], image[:, :, 0]], axis=-1) if 'pascal_normalize' in data_augmentations: image = image - [123.68, 116.78, 103.94] if 'random_contrast' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image = tf.image.random_contrast(image, lower=0.2, upper=1.8) print 'Applying random contrast.' if 'random_brightness' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image = tf.image.random_brightness(image, max_delta=63.) print 'Applying random brightness.' if 'grayscale' in data_augmentations and im_size_check: # image = tf.image.rgb_to_grayscale(image) image = tf.expand_dims(image[:, :, 0], axis=-1) # ABOVE INSTEAD? print 'Converting to grayscale.' # Affine augmentations if 'rotate' in data_augmentations and im_size_check: max_theta = 22. angle_rad = (max_theta / 180.) * math.pi angles = tf.random_uniform([], -angle_rad, angle_rad) transform = tf.contrib.image.angles_to_projective_transforms( angles, im_size[0], im_size[1]) image = tf.contrib.image.transform( image, tf.contrib.image.compose_transforms(transform), interpolation='BILINEAR') # or 'NEAREST' print 'Applying random rotate.' if 'rotate_image_label' in data_augmentations and im_size_check: max_theta = 30. angle_rad = (max_theta / 180.) * math.pi angles = tf.random_uniform([], -angle_rad, angle_rad) transform = tf.contrib.image.angles_to_projective_transforms( angles, im_size[0], im_size[1]) image = tf.contrib.image.transform( image, tf.contrib.image.compose_transforms(transform), interpolation='BILINEAR') # or 'NEAREST' label = tf.contrib.image.transform( label, tf.contrib.image.compose_transforms(transform), interpolation='BILINEAR') # or 'NEAREST' print 'Applying random rotate.' if 'random_scale_crop_image_label' in data_augmentations\ and im_size_check: scale_choices = tf.convert_to_tensor([1., 1.02, 1.04, 1.06, 1.08]) samples = tf.multinomial(tf.log([tf.ones_like(scale_choices)]), 1) image_shape = image.get_shape().as_list() scale = scale_choices[tf.cast(samples[0][0], tf.int32)] scale_tf = tf.cast( tf.round( np.asarray(model_input_image_size[:2]).astype(np.float32) * scale), tf.int32) combined = tf.concat([image, label], axis=-1) combo_shape = combined.get_shape().as_list() combined_crop = tf.random_crop( combined, tf.concat([scale_tf, [combo_shape[-1]]], 0)) combined_resize = tf.squeeze(tf.image.resize_bicubic( tf.expand_dims(combined_crop, axis=0), model_input_image_size[:2], align_corners=True), axis=0) image = combined_resize[:, :, :image_shape[-1]] label = combined_resize[:, :, image_shape[-1]:] image.set_shape(model_input_image_size) label.set_shape(model_input_image_size[:2] + [combo_shape[-1] - model_input_image_size[-1]]) if 'rc_res' in data_augmentations and im_size_check: image = random_crop(image, model_input_image_size) if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] ms = [x // 2 for x in model_input_image_size] image = resize_image_label(im=image, model_input_image_size=ms, f='bicubic') print 'Applying random crop and resize.' if 'cc_res' in data_augmentations and im_size_check: image = center_crop(image, model_input_image_size) if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] ms = [x // 2 for x in model_input_image_size] image = resize_image_label(im=image, model_input_image_size=ms, f='bicubic') print 'Applying center crop and resize.' if 'random_crop' in data_augmentations and im_size_check: image = random_crop(image, model_input_image_size) print 'Applying random crop.' if 'center_crop' in data_augmentations and im_size_check: image = center_crop(image, model_input_image_size) print 'Applying center crop.' if 'random_crop_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = crop_image_label(image=image, label=label, size=model_input_image_size, crop='random') if 'center_crop_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = crop_image_label(image=image, label=label, size=model_input_image_size, crop='center') if 'resize' in data_augmentations and im_size_check: if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='bicubic') print 'Applying area resize.' if 'jk_resize' in data_augmentations and im_size_check: if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = tf.image.resize_image_with_crop_or_pad( image, model_input_image_size[0], model_input_image_size[1]) print 'Applying area resize.' if 'resize_and_crop' in data_augmentations and im_size_check: model_input_image_size_1 = np.asarray( model_input_image_size[:2]) + 28 image = resize_image_label( im=image, model_input_image_size=model_input_image_size_1, f='area') image = center_crop(image, model_input_image_size) print 'Applying area resize.' if 'resize_nn' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='nearest') print 'Applying nearest resize.' if 'resize_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='bicubic') label = resize_image_label( im=label, model_input_image_size=model_input_image_size, f='bicubic') print 'Applying bilinear resize.' elif 'resize_nn_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='nearest') label = resize_image_label( im=label, model_input_image_size=model_input_image_size, f='nearest') print 'Applying nearest resize.' else: pass if 'left_right' in data_augmentations: image = image_flip(image, direction='left_right') print 'Applying random flip left-right.' if 'up_down' in data_augmentations: image = image_flip(image, direction='up_down') print 'Applying random flip up-down.' if 'lr_flip_image_label' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = lr_flip_image_label(image, label) if 'ud_flip_image_label' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = ud_flip_image_label(image, label) if 'gaussian_noise' in data_augmentations: im_shape = image.get_shape().as_list() assert len(im_shape) == 3, '4D not implemented yet.' sigma = 1. / 10. mu = 0. image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma) print 'Applying gaussian noise.' if 'gaussian_noise_small' in data_augmentations: im_shape = image.get_shape().as_list() assert len(im_shape) == 3, '4D not implemented yet.' sigma = 1. / 20. mu = 0. image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma) print 'Applying gaussian noise.' if 'calculate_rate_time_crop' in data_augmentations: im_shape = image.get_shape().as_list() minval = im_shape[0] // 3 time_crop = tf.random_uniform([], minval=minval, maxval=im_shape[0], dtype=tf.int32) # For now always pull from the beginning indices = tf.range(0, time_crop, dtype=tf.int32) selected_image = tf.gather(image, indices) padded_image = tf.zeros([im_shape[0] - time_crop] + im_shape[1:], dtype=selected_image.dtype) # Randomly concatenate pad to front or back image = tf.cond(pred=tf.greater( tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32), 0.5), true_fn=lambda: tf.concat( [selected_image, padded_image], axis=0), false_fn=lambda: tf.concat( [padded_image, selected_image], axis=0)) image.set_shape(im_shape) # Convert label to rate label = label / im_shape[0] if 'calculate_rate' in data_augmentations: label = label / image.get_shape().as_list()[0] print 'Applying rate transformation.' if 'threshold' in data_augmentations: image = tf.cast(tf.greater(image, 0.1), tf.float32) print 'Applying threshold.' if 'nonzero_label' in data_augmentations: label = tf.cast(tf.greater(label, 0.2), tf.float32) print 'Applying threshold.' if 'zero_one' in data_augmentations: image = tf.minimum(tf.maximum(image, 0.), 1.) print 'Applying threshold.' if 'timestep_duplication' in data_augmentations: image = tf.stack([image for iid in range(7)]) print 'Applying timestep duplication.' if 'per_image_standardization' in data_augmentations: image = tf.image.per_image_standardization(image) print 'Applying per-image zscore.' if 'flip_polarity' in data_augmentations: image = tf.abs(image - 1.) if 'NCHW' in data_augmentations: image = tf.transpose(image, (2, 0, 1)) else: assert len(image.get_shape()) == 3, '4D not implemented yet.' image = tf.image.resize_image_with_crop_or_pad( image, model_input_image_size[0], model_input_image_size[1]) return image, label
def main(args): network = importlib.import_module(args.model_def, 'inference') subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) with open(os.path.join(model_dir, 'args.txt'), 'w') as f: for arg in vars(args): f.write(arg + ' ' + str(getattr(args, arg)) + '\n') # Store some git revision info in a text file in the log directory if not args.no_store_revision_info: src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set = facenet.get_dataset(args.data_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) #image = tf.image.decode_png(file_contents) image = tf.image.decode_jpeg(file_contents, channels=3) if args.random_rotate: image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) image = tf.cast(image, tf.float32) #image = tf.image.per_image_standardization(image) distorted_image = tf.image.random_brightness(image, max_delta=32) image = tf.image.random_contrast(distorted_image, lower=0.5, upper=1.5) #images.append(tf.image.per_image_standardization(image)) images.append(image) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) batch_norm_params = { # Decay for the moving averages 'decay': 0.995, # epsilon to prevent 0s in variance 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], # Only update statistics during training mode 'is_training': phase_train_placeholder } # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) #pre_embeddings = slim.fully_connected(prelogits, args.embedding_size, activation_fn=None, # weights_initializer=tf.truncated_normal_initializer(stddev=0.1), # weights_regularizer=slim.l2_regularizer(args.weight_decay), # normalizer_fn=slim.batch_norm, # normalizer_params=batch_norm_params, # scope='Bottleneck', reuse=False) pre_embeddings = _fully_connected(prelogits, args.embedding_size, name='Bottleneck') embeddings = tf.nn.l2_normalize(pre_embeddings, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) evaluate_Training(sess, train_set, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, args.batch_size, log_dir, step, summary_writer, args.embedding_size) # Evaluate on LFW #if args.lfw_dir: # evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, # batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, # args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) sess.close() return model_dir
def input_pipeline(mode, batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS): with tf.name_scope('img_pipeline'): if mode == 'train': filenames = [TRAIN_FILENAME] image_feature = 'train/image' label_feature = 'train/label' else: filenames = [VAL_FILENAME] image_feature = 'val/image' label_feature = 'val/label' feature = { image_feature: tf.FixedLenFeature([], tf.string), label_feature: tf.FixedLenFeature([], tf.int64) } # Create a list of filenames and pass it to a queue filename_queue = tf.train.string_input_producer(filenames, num_epochs=NUM_EPOCHS + 1) # Define a reader and read the next record options = tf_record.TFRecordOptions( compression_type=tf_record.TFRecordCompressionType.GZIP) reader = tf.TFRecordReader(options=options) _, serialized_example = reader.read(filename_queue) # Decode the record read by the reader features = tf.parse_single_example(serialized_example, features=feature) # Convert the image data from string back to the numbers image = tf.decode_raw(features[image_feature], tf.uint8) # Cast label data into one_hot encoded label = tf.cast(features[label_feature], tf.int32) label = tf.one_hot(label, NUM_CLASSES) # Reshape image data into the original shape image = tf.reshape(image, [256, 256, 3]) # Any preprocessing here ... # 1. random cropping 224x224 # 2. random LR-flipping image = tf.random_crop(image, [224, 224, 3]) image = tf.image.random_flip_left_right(image) #print_features(image) # Creates batches by randomly shuffling tensors # min_after_dequeue defines how big a buffer we will randomly sample # from -- bigger means better shuffling but slower start up and more # memory used. # capacity must be larger than min_after_dequeue and the amount larger # determines the maximum we will prefetch. Recommendation: # min_after_dequeue + (num_threads + a small safety margin) * batch_size min_after_dequeue = 100 num_threads = 6 capacity = min_after_dequeue + (num_threads + 2) * BATCH_SIZE images, labels = tf.train.shuffle_batch( [image, label], batch_size=BATCH_SIZE, capacity=capacity, num_threads=num_threads, min_after_dequeue=min_after_dequeue) #print("input_pipeline will return now.") return images, labels
def build_graph(train): with tf.device("/cpu:0"): with tf.name_scope('X'): x = tf.placeholder(tf.float32, [None, 284, 284, 1], name='x') mlp = x if train: with tf.name_scope('RANDOM-CROP-FLIP'): crop_x = tf.map_fn(lambda img: tf.random_crop(img, [272, 272, 1]), mlp) # crop_x = tf.map_fn(lambda img: tf.image.random_flip_up_down(img), crop_x) # 使用random_flip_up_down会影响逐pose可视化X-Y-MASK 翻转改在数据加载中先做一遍增广 需要两倍内存 mlp = crop_x else: with tf.name_scope('CENTER-CROP'): crop_x = tf.map_fn(lambda img: tf.image.resize_image_with_crop_or_pad(img, 272, 272), mlp) mlp = crop_x with tf.name_scope('CONV-1'): c1 = 16 res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c1-1]]) mlp = conv(mlp, weight([3, 3, 1, c1], name='w11')) + positive_bias([c1], name='b11') mlp = tf.nn.relu(mlp, name='conv1') mlp = conv(mlp, weight([3, 3, c1, c1], name='w12')) + positive_bias([c1], name='b12') mlp = tf.nn.relu(mlp, name='conv2') mlp = conv(mlp, weight([3, 3, c1, c1], name='w13')) + positive_bias([c1], name='b13') mlp = tf.nn.relu(mlp, name='conv3') # mlp = conv(mlp, weight([3, 3, c1, c1], name='w14')) + positive_bias([c1], name='b14') # mlp = tf.nn.relu(mlp, name='conv4') mlp = tf.add(mlp, res, name='res') mlp = pool(mlp, name='pool') with tf.name_scope('CONV-2'): c2 = 32 res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c2-c1]]) mlp = conv(mlp, weight([3, 3, c1, c2], name='w21')) + positive_bias([c2], name='b21') mlp = tf.nn.relu(mlp, name='conv1') mlp = conv(mlp, weight([3, 3, c2, c2], name='w22')) + positive_bias([c2], name='b22') mlp = tf.nn.relu(mlp, name='conv2') mlp = conv(mlp, weight([3, 3, c2, c2], name='w23')) + positive_bias([c2], name='b23') mlp = tf.nn.relu(mlp, name='conv3') # mlp = conv(mlp, weight([3, 3, c2, c2], name='w24')) + positive_bias([c2], name='b24') # mlp = tf.nn.relu(mlp, name='conv4') mlp = tf.add(mlp, res, name='res') mlp = pool(mlp, name='pool') with tf.name_scope('CONV-3'): c3 = 64 res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c3-c2]]) mlp = conv(mlp, weight([3, 3, c2, c3], name='w31')) + positive_bias([c3], name='b31') mlp = tf.nn.relu(mlp, name='conv1') mlp = conv(mlp, weight([3, 3, c3, c3], name='w32')) + positive_bias([c3], name='b32') mlp = tf.nn.relu(mlp, name='conv2') # mlp = conv(mlp, weight([3, 3, c3, c3], name='w33')) + positive_bias([c3], name='b33') # mlp = tf.nn.relu(mlp, name='conv3') # mlp = conv(mlp, weight([3, 3, c3, c3], name='w34')) + positive_bias([c3], name='b34') # mlp = tf.nn.relu(mlp, name='conv4') mlp = tf.add(mlp, res, name='res') mlp = pool(mlp, name='pool') with tf.name_scope('CONV-4'): c4 = 128 res = tf.pad(mlp, [[0, 0], [0, 0], [0, 0], [0, c4-c3]]) mlp = conv(mlp, weight([3, 3, c3, c4], name='w41')) + positive_bias([c4], name='b41') mlp = tf.nn.relu(mlp, name='conv1') mlp = conv(mlp, weight([3, 3, c4, c4], name='w42')) + positive_bias([c4], name='b42') mlp = tf.nn.relu(mlp, name='conv2') # mlp = conv(mlp, weight([3, 3, c4, c4], name='w43')) + positive_bias([c4], name='b43') # mlp = tf.nn.relu(mlp, name='conv3') # mlp = conv(mlp, weight([3, 3, c4, c4], name='w44')) + positive_bias([c4], name='b44') # mlp = tf.nn.relu(mlp, name='conv4') mlp = tf.add(mlp, res, name='res') mlp = pool(mlp, name='pool') ''' with tf.name_scope('MASK'): ca = 66 mask = tf.reshape(mlp, [-1, c4]) mask = tf.nn.xw_plus_b(mask, weight([c4, ca], 'w5'), zero_bias([ca], 'b5')) mask = tf.tanh(mask) mask = tf.nn.xw_plus_b(mask, weight([ca, 1], 'w6'), zero_bias([1], 'b6')) mask = tf.reshape(mask, [-1, 17*17]) mask = tf.nn.softmax(mask) mask = tf.reshape(mask, [-1, 17, 17, 1]) mlp = tf.mul(mlp, mask) mlp = tf.reduce_sum(mlp, [1, 2], True) ''' if train: with tf.name_scope('DROPOUT'): mlp = tf.nn.dropout(mlp, 0.5, noise_shape=tf.shape(mlp)*[1, 0, 0, 1]+[0, 1, 1, 0], name='dropout') # dropout by map with tf.name_scope('FLAT'): mlp = tf.reshape(mlp, [-1, 17*17*c4], name='flat') ''' if train: with tf.name_scope('DROPOUT'): mlp = tf.nn.dropout(mlp, 0.5, name='dropout') ''' # 1FC with tf.name_scope('FC'): logit_exp = tf.nn.xw_plus_b(mlp, weight([17*17*c4, 7], name='w7_exp'), zero_bias([7], name='b7_exp'), name='logit_exp') logit_pse = tf.nn.xw_plus_b(mlp, weight([17*17*c4, 5], name='w7_pse'), zero_bias([5], name='b7_pse'), name='logit_pse') del mlp with tf.name_scope('Y'): y_exp = tf.placeholder(tf.float32, [None, 7], name='y_exp') y_pse = tf.placeholder(tf.float32, [None, 5], name='y_pse') with tf.name_scope('SOFTMAX-WITH-LOSS'): loss_exp = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logit_exp, y_exp), name='loss_exp') loss_pse = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logit_pse, y_pse), name='loss_pse') lambda_ = 0 loss = loss_exp + lambda_ * loss_pse with tf.name_scope('SOFTMAX'): prob_exp = tf.nn.softmax(logit_exp, name='prob_exp') prob_pse = tf.nn.softmax(logit_pse, name='prob_pse') with tf.name_scope('ACC'): acc_exp = tf.equal(tf.argmax(prob_exp, 1), tf.argmax(y_exp, 1), name='correct_exp') acc_exp = tf.reduce_mean(tf.cast(acc_exp, tf.float32), name='acc_exp') acc_pse = tf.equal(tf.argmax(prob_pse, 1), tf.argmax(y_pse, 1), name='correct_pse') acc_pse = tf.reduce_mean(tf.cast(acc_pse, tf.float32), name='acc_pse') if train: with tf.name_scope('OPT'): opt = tf.train.AdamOptimizer(name='opt') train_op = opt.minimize(loss, name='train_op') else: train_op = None # 创建summary ''' with tf.name_scope('SUM'): # mask 为方便展示仅尺度变化 # mask_m = tf.reduce_min(mask, [1, 2], True) mask_M = tf.reduce_max(mask, [1, 2], True) mask_visual = mask / mask_M * 255.0 # mask_visual = (mask-mask_m) / (mask_M-mask_m) * 255.0 # prj_mask 为方便展示仅尺度变化 prj_mask = mask prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2) prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2) prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2) prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.nn.conv2d(prj_mask, tf.ones([3, 3, 1, 1])/9.0, strides=[1, 1, 1, 1], padding='SAME') prj_mask = tf.image.resize_nearest_neighbor(prj_mask, tf.shape(prj_mask)[1:3]*2) # prj_mask_m = tf.reduce_min(prj_mask, [1, 2], True) prj_mask_M = tf.reduce_max(prj_mask, [1, 2], True) prj_mask_visual = prj_mask / prj_mask_M * 255.0 # prj_mask_visual = (prj_mask-prj_mask_m) / (prj_mask_M-prj_mask_m) * 255.0 # mask_crop_x 为方便展示动态范围变化 mask_crop_x = prj_mask * crop_x mask_crop_x_m = tf.reduce_min(mask_crop_x, [1, 2], True) mask_crop_x_M = tf.reduce_max(mask_crop_x, [1, 2], True) mask_crop_x_visual = (mask_crop_x - mask_crop_x_m) / (mask_crop_x_M - mask_crop_x_m) * 255.0 # y_exp y_exp_visual = tf.reshape(y_exp, [-1, 1, 7, 1]) * 255.0 # y_pse y_pse_visual = tf.reshape(y_pse, [-1, 1, 7, 1]) * 255.0 # prob prob_visual = tf.reshape(prob, [-1, 1, 7, 1]) * 255.0 ''' if train: summary = tf.merge_summary([ # tf.image_summary('train mask', mask_visual), # 1 17 17 1 # tf.image_summary('train prj_mask', prj_mask_visual), # 1 272 272 1 # tf.image_summary('train crop_x', crop_x), # 1 272 272 1 # tf.image_summary('train mask_crop_x', mask_crop_x_visual), # 1 272 272 1 # tf.image_summary('train y_exp', y_exp_visual), # 1 1 7 1 # tf.image_summary('train y_pse', y_pse_visual), # 1 1 5 1 # tf.image_summary('train prob', prob_visual), # 1 1 7 1 tf.scalar_summary('train loss', loss), tf.scalar_summary('train loss_exp', loss_exp), tf.scalar_summary('train loss_pse', loss_pse), tf.scalar_summary('train acc_exp', acc_exp), tf.scalar_summary('train acc_pse', acc_pse), ]) else: summary = tf.merge_summary([ # tf.image_summary('val mask', mask_visual), # 1 17 17 1 # tf.image_summary('val prj_mask', prj_mask_visual), # 1 272 272 1 # tf.image_summary('val crop_x', crop_x), # 1 272 272 1 # tf.image_summary('val mask_crop_x', mask_crop_x_visual), # 1 272 272 1 # tf.image_summary('val y_exp', y_exp_visual), # 1 1 7 1 # tf.image_summary('val y_pse', y_pse_visual), # 1 1 5 1 # tf.image_summary('val prob', prob_visual), # 1 1 7 1 tf.scalar_summary('val loss', loss), tf.scalar_summary('val loss_exp', loss_exp), tf.scalar_summary('val loss_pse', loss_pse), tf.scalar_summary('val acc_exp', acc_exp), tf.scalar_summary('val acc_pse', acc_pse), ]) return [x, y_exp, y_pse, loss, acc_exp, acc_pse, train_op, summary, crop_x]
tf.summary.scalar('loss', loss) # The SGD Optimizer with momentum learning_rate = tf.placeholder(tf.float32, []) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.MomentumOptimizer(learning_rate, momentum=0.9).minimize(loss) correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_actual, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100 tf.summary.scalar('acc', accuracy) img = tf.placeholder(tf.float32, [32, 32, 3]) norm_image = tf.image.per_image_standardization(img) img_rand_crop = tf.random_crop(img, [28, 28, 3]) sess = tf.InteractiveSession() tensorboard_data = tf.summary.merge_all() current_time = str(time.time()) train_writer = tf.summary.FileWriter('../Tensorboard/inception/train/' +\ current_time, sess.graph) test_writer = tf.summary.FileWriter('../Tensorboard/inception/test/' +\ current_time, sess.graph) tf.global_variables_initializer().run() cifar10_train_images = [] cifar10_train_labels = [] print "Loading training images..." for i in range(1, 6): train_file = open('../../cifar-10-batches-py/data_batch_' + str(i), 'r')
def cnn_model_fn(features, labels, mode, num_classes=20): """Model function for CNN.""" # Input Layer input_layer = tf.reshape(features["x"], [-1, 256, 256, 3]) # Data Augmentation # Train: Random crops and left-right flips if mode == tf.estimator.ModeKeys.TRAIN: tmp = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), input_layer) tmp = tf.map_fn(lambda img: tf.random_crop(img, size=[224,224,3]), tmp) augment_input = tf.map_fn(lambda img: tf.image.resize_images(img, size=[256,256]), tmp) # add to tensorboard tf.summary.image('training_images', augment_input) # Test: Center crop elif mode == tf.estimator.ModeKeys.PREDICT: tmp = tf.map_fn(lambda img: tf.image.central_crop(img, central_fraction=0.8), input_layer) augment_input = tf.map_fn(lambda img: tf.image.resize_images(img, size=[256,256]), tmp) # add Network Graph to tensorboard # convolution layer #1: conv3-64 with tf.variable_scope('conv1'): conv1 = tf.layers.conv2d( inputs=augment_input, kernel_size=[3, 3], strides=1, filters=64, padding="same", activation=tf.nn.relu, name = "conv1_1") # convolution layer #2: conv3-64 conv2 = tf.layers.conv2d( inputs=conv1, kernel_size=[3, 3], strides=1, filters=64, padding="same", activation=tf.nn.relu, name = "conv1_2") with tf.variable_scope('pool1'): # pooling layer #1 pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2, name = "pool1") # convolution layer #3: conv3-128 with tf.variable_scope('conv2'): conv3 = tf.layers.conv2d( inputs=pool1, kernel_size=[3, 3], strides=1, filters=128, padding="same", activation=tf.nn.relu, name = "conv2_1") # convolution layer #4: conv3-128 conv4 = tf.layers.conv2d( inputs=conv3, kernel_size=[3, 3], strides=1, filters=128, padding="same", activation=tf.nn.relu, name = "conv2_2") with tf.variable_scope('pool2'): # pooling layer #2 pool2 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2, name = "pool2") # convolution layer #5: conv3-256 with tf.variable_scope('conv3'): conv5 = tf.layers.conv2d( inputs=pool2, kernel_size=[3, 3], strides=1, filters=256, padding="same", activation=tf.nn.relu, name = "conv3_1") # convolution layer #6: conv3-256 conv6 = tf.layers.conv2d( inputs=conv5, kernel_size=[3, 3], strides=1, filters=256, padding="same", activation=tf.nn.relu, name = "conv3_2") # convolution layer #7: conv3-256 conv7 = tf.layers.conv2d( inputs=conv6, kernel_size=[3, 3], strides=1, filters=256, padding="same", activation=tf.nn.relu, name = "conv3_3") with tf.variable_scope('pool3'): # pooling layer #3 pool3 = tf.layers.max_pooling2d(inputs=conv7, pool_size=[2, 2], strides=2, name = "pool3") # convolution layer #8: conv3-512 with tf.variable_scope('conv4'): conv8 = tf.layers.conv2d( inputs=pool3, kernel_size=[3, 3], strides=1, filters=512, padding="same", activation=tf.nn.relu, name = "conv4_1") # convolution layer #9: conv3-512 conv9 = tf.layers.conv2d( inputs=conv8, kernel_size=[3, 3], strides=1, filters=512, padding="same", activation=tf.nn.relu, name = "conv4_2") # convolution layer #10: conv3-512 conv10 = tf.layers.conv2d( inputs=conv9, kernel_size=[3, 3], strides=1, filters=512, padding="same", activation=tf.nn.relu, name = "conv4_3") with tf.variable_scope('pool4'): # pooling layer #4 pool4 = tf.layers.max_pooling2d(inputs=conv10, pool_size=[2, 2], strides=2, name = "pool4") # convolution layer #11: conv3-512 with tf.variable_scope('conv5'): conv11 = tf.layers.conv2d( inputs=pool4, kernel_size=[3, 3], strides=1, filters=512, padding="same", activation=tf.nn.relu, name = "conv5_1") # convolution layer #12: conv3-512 conv12 = tf.layers.conv2d( inputs=conv11, kernel_size=[3, 3], strides=1, filters=512, padding="same", activation=tf.nn.relu, name = "conv5_2") # convolution layer #13: conv3-512 conv13 = tf.layers.conv2d( inputs=conv12, kernel_size=[3, 3], strides=1, filters=512, padding="same", activation=tf.nn.relu, name = "conv5_3") with tf.variable_scope('pool5'): # pooling layer #5 pool5 = tf.layers.max_pooling2d(inputs=conv13, pool_size=[2, 2], strides=2, name = "pool5") # flatten pool5_flat = tf.reshape(pool5, [-1, 8 * 8 * 512]) # fc(4096) dense1 = tf.layers.dense(inputs=pool5_flat, units=4096, activation=tf.nn.relu, name = "fc6") # dropout dropout1 = tf.layers.dropout( inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) # fc(4096) dense2 = tf.layers.dense(inputs=dropout1, units=4096, activation=tf.nn.relu, name = "fc7") # dropout dropout2 = tf.layers.dropout( inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) # Logits Layer logits = tf.layers.dense(inputs=dropout2, units=20, name = "fc8") predictions = { # Generate predictions (for PREDICT and EVAL mode) #"classes": tf.argmax(input=logits, axis=1), # Add `softmax_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": tf.sigmoid(logits, name="sigmoid_tensor") } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) loss = tf.identity(tf.losses.sigmoid_cross_entropy( multi_class_labels=labels, logits=logits), name='loss') # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar("training_loss", loss) decayed_learning_rate = tf.train.exponential_decay( 0.001, # Base learing rate global_step=tf.train.get_global_step(), decay_steps=100, # Decay step decay_rate=0.5, # Decay rate staircase=True) # add lr to tensorboard tf.summary.scalar('learning_rate', decayed_learning_rate) # SGD + Momentum optimizer optimizer = tf.train.MomentumOptimizer(learning_rate=decayed_learning_rate, momentum = 0.9) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) # add histogram of gradients to tensorboard train_summary =[] grads_and_vars = optimizer.compute_gradients(loss) # tf.summary.histogram("grad_histogram",grads_and_vars) for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad_histogram".format(v.name[:-2]), g) #sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) train_summary.append(grad_hist_summary) #train_summary.append(sparsity_summary) tf.summary.merge(train_summary) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op) tf.summary.scalar('test_loss', loss) eval_metric_ops = { "accuracy": tf.metrics.accuracy( labels=labels, predictions=predictions["probabilities"])} return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def distorted_inputs(data_dir, batch_size, noise_ratio=0): """Construct distorted input for CIFAR training using the Reader ops. Args: data_dir: Path to the CIFAR-10 data directory. batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. """ filenames = [ os.path.join( data_dir, 'data_batch_%d_noise_%.2f_with_index.bin' % (v, noise_ratio)) for v in xrange(1, 6) ] for f in filenames: if not tf.gfile.Exists(f): raise ValueError('Failed to find file: ' + f) # Create a queue that produces the filenames to read. filename_queue = tf.train.string_input_producer(filenames) with tf.name_scope('data_augmentation'): # Read examples from files in the filename queue. read_input = read_cifar10(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE # Image processing for training the network. Note the many random # distortions applied to the image. # padding distorted_image = tf.image.resize_image_with_crop_or_pad( reshaped_image, height + 4, width + 4) # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(distorted_image, [height, width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(distorted_image) #float_image = (distorted_image/255 - tf.reshape(tf.constant([0.507, 0.487, 0.441]),[1,1,3]))/tf.reshape(tf.constant([0.267, 0.256, 0.276]),[1,1,3]) # Set the shapes of tensors. float_image.set_shape([height, width, 3]) read_input.index.set_shape([1]) read_input.label.set_shape([1]) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print('Filling queue with %d CIFAR images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. return _generate_image_and_label_batch(read_input.index, float_image, read_input.label, min_queue_examples, batch_size, shuffle=True)
def cnn_model_fn(features, labels, mode, num_classes=20): if mode == tf.estimator.ModeKeys.PREDICT: features["x"] = tf.image.resize_image_with_crop_or_pad( features["x"], 224, 224) else: augmentedData = tf.map_fn( lambda img: tf.image.random_flip_left_right(img), features["x"]) augmentedData = tf.map_fn( lambda img: tf.random_crop(img, [224, 224, 3]), augmentedData) features["x"] = augmentedData # features_flipped = tf.image.random_flip_left_right(features["x"]) # features["x"].append(features_flipped) input_layer = tf.reshape(features["x"], [-1, 224, 224, 3]) # Convolutional Layer #1 conv1 = tf.layers.conv2d(inputs=input_layer, filters=96, strides=4, kernel_size=[11, 11], kernel_initializer=tf.initializers.random_normal( 0, 0.01), bias_initializer=tf.initializers.zeros(), padding="valid", activation=tf.nn.relu) # Pooling Layer #1 pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3], strides=2) # Convolutional Layer #2 and Pooling Layer #2 conv2 = tf.layers.conv2d(inputs=pool1, filters=256, kernel_size=[5, 5], strides=1, kernel_initializer=tf.initializers.random_normal( 0, 0.01), bias_initializer=tf.initializers.zeros(), padding="same", activation=tf.nn.relu) pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[3, 3], strides=2) # Convolutional Layer #3,#4,#5 and Pooling Layer #3 conv3 = tf.layers.conv2d(inputs=pool2, filters=384, kernel_size=[3, 3], strides=1, kernel_initializer=tf.initializers.random_normal( 0, 0.01), bias_initializer=tf.initializers.zeros(), padding="same", activation=tf.nn.relu) conv4 = tf.layers.conv2d(inputs=conv3, filters=384, kernel_size=[3, 3], strides=1, kernel_initializer=tf.initializers.random_normal( 0, 0.01), bias_initializer=tf.initializers.zeros(), padding="same", activation=None) conv5 = tf.layers.conv2d(inputs=conv4, filters=256, kernel_size=[3, 3], strides=1, kernel_initializer=tf.initializers.random_normal( 0, 0.01), bias_initializer=tf.initializers.zeros(), padding="same", activation=None) pool3 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2) pool3_flat = tf.reshape(pool3, [-1, 5 * 5 * 256]) dense1 = tf.layers.dense(inputs=pool3_flat, units=4096, activation=tf.nn.relu) dropout1 = tf.layers.dropout(inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) dense2 = tf.layers.dense(inputs=dropout1, units=4096, activation=tf.nn.relu) dropout2 = tf.layers.dropout(inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) # Logits Layer logits = tf.layers.dense(inputs=dropout2, units=num_classes) predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), # Add `softmax_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": tf.nn.sigmoid(logits, name="softmax_tensor") } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) loss = tf.identity(tf.losses.sigmoid_cross_entropy( multi_class_labels=labels, logits=logits), name='loss') # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: ''' summary_hook = tf.train.SummarySaverHook( 400, output_dir="/tmp/pascal_model_alexnet", summary_op=tf.summary.merge_all()) ''' global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.001 learning_rate = tf.train.exponential_decay(starter_learning_rate, tf.train.get_global_step(), 10000, 0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def distorted_inputs(data_dir, batch_size): """Construct distorted input for CIFAR training using the Reader ops. Args: data_dir: Path to the CIFAR-10 data directory. batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. """ filenames = [ os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in xrange(1, 6) ] for f in filenames: if not tf.gfile.Exists(f): raise ValueError('Failed to find file: ' + f) # Create a queue that produces the filenames to read. filename_queue = tf.train.string_input_producer(filenames) with tf.name_scope('data_augmentation'): # Read examples from files in the filename queue. read_input = read_cifar10(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE # Image processing for training the network. Note the many random # distortions applied to the image. # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing # the order their operation. # NOTE: since per_image_standardization zeros the mean and makes # the stddev unit, this likely has no effect see tensorflow#1458. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(distorted_image) # Set the shapes of tensors. float_image.set_shape([height, width, 3]) read_input.label.set_shape([1]) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print('Filling queue with %d CIFAR images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. import IPython IPython.embed() return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size, shuffle=True)
def distorted_inputs(data_dir, batch_size): """Construct distorted input for training using the Reader ops. Args: data_dir: Path to the dogcat data directory. batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. """ onlyfiles = [ f for f in listdir(data_dir) if (isfile(join(data_dir, f)) and f.endswith('.jpg')) ] filepaths = [join(data_dir, f) for f in onlyfiles] labels = [label_by_name(f) for f in onlyfiles] # Create a queue that produces the filenames to read. # filenames_queue = tf.train.string_input_producer(filepaths) # labels_queue = tf.train.string_input_producer(labels) images_tensor = tf.convert_to_tensor(filepaths, dtype=tf.string) filename_tensor = tf.convert_to_tensor(onlyfiles, dtype=tf.string) labels_tensor = tf.convert_to_tensor(labels, dtype=tf.int32) input_queue = tf.train.slice_input_producer([filepaths, labels], shuffle=False) with tf.name_scope('data_augmentation'): # Read examples from files in the filename queue. read_input = read_data(input_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE # Image processing for training the network. Note the many random # distortions applied to the image. # Randomly crop a [height, width] section of the image. distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Because these operations are not commutative, consider randomizing # the order their operation. # NOTE: since per_image_standardization zeros the mean and makes # the stddev unit, this likely has no effect see tensorflow#1458. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(distorted_image) # Set the shapes of tensors. float_image.set_shape([height, width, 3]) read_input.label.set_shape([1]) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print( 'Filling queue with %d cat and dog images before starting to train. ' 'This will take a few minutes...' % min_queue_examples) # Generate a batch of images and labels by building up a queue of examples. return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples, batch_size, shuffle=True)
def _random_crop_image(img): return tf.random_crop(img, [rows, cols, 3])
def build_model(self): self.subject_num = SUBJECT_NUM_VGG2 self.input_labels = tf.placeholder(tf.int64, [ self.batch_size, ], name='positive_labels') self.input_filenames = [ tf.placeholder(dtype=tf.string) for _ in range(self.batch_size) ] self.sample_images = tf.placeholder( tf.float32, [self.sample_size] + [self.output_size, self.output_size, self.c_dim], name='sample_images') self.sample_input_images = tf.placeholder( tf.float32, [1, self.output_size, self.output_size, self.c_dim], name='sample_input_images') # Networks self.images = [] for i in range(self.batch_size): file_contents = tf.read_file(self.input_filenames[i]) image = tf.image.decode_jpeg(file_contents, channels=3) image = tf.image.resize_images( image, [self.before_crop_size, self.before_crop_size]) #if self.random_rotate: # image = tf.py_func(random_rotate_image, [image], tf.uint8) if (self.padding > 0): image = tf.random_crop(image, [self.output_size, self.output_size, 3]) #if args.random_crop: # image = tf.random_crop(image, [args.image_size, args.image_size, 3]) #else: # image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if self.random_flip: image = tf.image.random_flip_left_right(image) self.images.append( tf.subtract(tf.div(tf.cast(image, dtype=tf.float32), 127.5), 1.0)) self.images = tf.stack(self.images) opt = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta1) self.images_splits = tf.split(self.images, self.num_gpus) self.input_labels_splits = tf.split(self.input_labels, self.num_gpus) self.d_loss_real_id = [] self.d_acc = [] self.d_loss_real_center = [] tower_grads = [] for gpu_id in range(self.num_gpus): with tf.device( tf.DeviceSpec(device_type="GPU", device_index=gpu_id)): if gpu_id == 0: reuse = False else: reuse = True self.D_R_id_logits, self.D_R_fx, self.D_R_ln_w = self.discriminator( self.images_splits[gpu_id], is_reuse=reuse ) # _, self.D_R_logits, _, self.D_R_id_logits, _, self.D_R_pose_logits,_ self.m_l2 = tf.pow( tf.norm(tf.matmul(self.D_R_fx, self.D_R_ln_w), ord='euclidean', axis=[-2, -1]), 2) * 0.0000001 self.d_acc_i = slim.metrics.accuracy( tf.argmax(self.D_R_id_logits, 1), self.input_labels_splits[gpu_id], weights=100.0) #self.D_R_id_logits = AMSoftmax_logit_v2(self.D_R_id_logits, self.D_R_ln_w, # label_batch=self.input_labels_splits[gpu_id], # nrof_classes=self.subject_num) # tf.global_variables_initializer().run() # D Loss self.d_loss_real_id_i = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.D_R_id_logits, labels=self.input_labels_splits[gpu_id])) # self.d_loss_real_center_i, self.real_centers = center_loss(self.D_R_fx, self.input_labels_splits[gpu_id], 0.5, self.subject_num) # self.d_loss_real_center_i *= 0.03 self.d_loss_regularizer = tf.zeros(shape=[]) self.d_loss_real_center = tf.zeros(shape=[]) grads = opt.compute_gradients(self.d_loss_real_id_i) tower_grads.append(grads) self.d_loss_real_id.append(self.d_loss_real_id_i) self.d_acc.append(self.d_acc_i) # self.d_loss_real_center.append(self.d_loss_real_center_i) # self.d_loss_real_center = tf.reduce_mean(self.d_loss_real_center) grads = average_gradients(tower_grads) self.train_op = opt.apply_gradients(grads) self.d_loss_real_id = tf.reduce_mean(self.d_loss_real_id) self.d_acc = tf.reduce_mean(self.d_acc) self.d_loss = self.d_loss_real_id # Sumaries tf.summary.scalar("Total loss", self.d_loss) tf.summary.scalar("ID - softmax loss", self.d_loss_real_id) tf.summary.scalar("Center loss", self.d_loss_real_center) tf.summary.scalar("Regularizer loss", self.d_loss_regularizer) tf.summary.scalar("Train accuracy", self.d_acc) tf.summary.scalar("M - L2 loss", self.m_l2) self.summary_op = tf.summary.merge_all() #self.summary_writer = tf.summary.FileWriter(self.checkpoint_dir+"/"+self.model_dir+"/log", self.sess.graph) self.d_loss = tf.reduce_mean(self.d_loss) self.d_acc = tf.reduce_mean(self.d_acc) # Vars self.t_vars = tf.trainable_variables() self.d_vars = [ var for var in self.t_vars if not ('d_' not in var.name or 'd_k6_id_31239_pai3pi' in var.name or 'd_k6_id_FactoryOne' in var.name or 'd_k6_id_FactoryTwo' in var.name) ] for var in self.d_vars: print var.op.name self.d_saver = tf.train.Saver(self.d_vars, keep_checkpoint_every_n_hours=.5, max_to_keep=20) self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=.5, max_to_keep=10) '''