def __init__(self, patch_size=512, skip_connection="add", input_img=None, truth_img=None, input_mask=None, ref_mask=None): """ :param skip_connection: concat | add """ # hyper parameters self.size = patch_size self.skip_connection = skip_connection self.down_channels = [64, 64, 128, 128, 256, 256, 512] # self.fc_size = 1024 # NOTE: static fc_size is deprecated, uses a 0.5x bottleneck self.up_channels = [512, 256, 256, 128, 128, 64, 64] self.batch_size = None self.lr = 1e-3 # i/o tensors # input img should be patches in size 512 self.input_img = input_img or tf.placeholder( shape=[None, self.size, self.size, 3], dtype=tf.float32) self.truth_img = truth_img or tf.placeholder( shape=[None, self.size, self.size, 3], dtype=tf.float32) # `input_mask` is applied on `input_img` to locate foreground self.input_mask = input_mask or tf.placeholder( shape=[None, self.size, self.size, 3], dtype=tf.float32) # `ref_mask + input_mask` is the area to apply inpainting self.ref_mask = ref_mask self.output_img = None self.metric = elpips.Metric(elpips.elpips_vgg(batch_size=1, n=1), back_prop=False) # internal tensors, set after building self.down_layers = None self.loss = None self.optimizer = None self.train_op = None self.merged_summary = None self.global_step = None self.saver = None
def run_metrics(): # TODO HOX. import tensorflow as tf import elpips import darc import csv # Build graph. tf_X_input = tf.placeholder(tf.float32) tf_Y_input = tf.placeholder(tf.float32) tf_X = tf.expand_dims(tf_X_input, axis=0) tf_Y = tf.expand_dims(tf_Y_input, axis=0) tf_Y_grayscale = tf.reduce_mean(tf_Y, axis=3, keepdims=True) tf_l2 = tf.reduce_mean(tf.square(tf_X - tf_Y)) tf_l1 = tf.reduce_mean(tf.abs(tf_X - tf_Y)) tf_relmse = tf.reduce_mean( tf.square(tf_X - tf_Y) / (0.001 + tf.square(tf_Y_grayscale))) # Note: It would be somewhat faster to just use n=args.elpips_sample_count but TF has # problems with n > 1 on some GPUs. elpips_vgg_model = elpips.Metric(elpips.elpips_vgg(n=1), back_prop=False) tf_elpips_vgg = elpips_vgg_model.forward(tf_X, tf_Y)[0] print("Creating Tensorflow session.") tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf_config.gpu_options.per_process_gpu_memory_fraction = 0.8 with tf.Session(config=tf_config) as sess: # Initialize model. sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) # Iterate over the archives. tasks = collections.deque() with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: for archive_path in DATASETS: # Reconstruct scene. scene_name = get_scene_name(archive_path) current_darc = darc.DataArchive(archive_path) crop_count = current_darc[0].shape[0] - 1 image_count = len(current_darc) # Read minibatches. for image_index in range(image_count): # Execute previous tasks. while tasks: task = tasks[0].result() tasks.popleft() print("Loading reference... ", end="") sys.stdout.flush() stime = time.time() reference = current_darc[image_index][-1, :, :, 0:3] etime = time.time() print("Done in {:.01f}s.".format(etime - stime)) directory = os.path.join(OUT_DIRECTORY, config.model.name, scene_name) if not os.path.exists(directory): print( "Skipping directory '{}': Directory does not exist." .format(directory)) continue for crop_index in range(crop_count): if args.crops and crop_index not in args.crops: print("(Skipping scene {}, image {}, crop {}).". format(scene_name, image_index, crop_index)) continue crop_path = os.path.join( directory, "img{:04d}_crop{:02d}".format( image_index, crop_index)) if not os.path.exists(crop_path + ".npz"): print("Skipping: Not found.") continue with open( os.path.join( directory, "img{:04d}_results.{}.csv".format( image_index, crop_index)), 'w') as csvfile: fields = [ 'crop_index', 'l1', 'l2', 'relmse', 'elpips-vgg', 'elpips-vgg-stdev' ] csv_writer = csv.DictWriter(csvfile, fieldnames=fields) csv_writer.writeheader() print( "Handling scene {}, image {}, crop {}.".format( scene_name, image_index, crop_index)) # Load image. print("Loading image... ", end="") sys.stdout.flush() stime = time.time() current_image = image.load_npz(crop_path + ".npz") etime = time.time() print("Done in {:.01f}s.".format(etime - stime)) # Run metrics. print("Running metrics... ", end="") sys.stdout.flush() stime = time.time() err_l1, err_l2, err_relmse = sess.run( [tf_l1, tf_l2, tf_relmse], feed_dict={ tf_X_input: current_image, tf_Y_input: reference }) print_dot() err_elpips_vgg = [] for i in range(args.elpips_sample_count): if i > 0 and i % 10 == 0: print_dot() err_elpips_vgg_single = sess.run( tf_elpips_vgg, feed_dict={ tf_X_input: current_image, tf_Y_input: reference }) err_elpips_vgg.append(err_elpips_vgg_single) err_elpips_vgg_mean = np.mean(err_elpips_vgg) err_elpips_vgg_std = np.std( err_elpips_vgg, ddof=1) / np.sqrt( args.elpips_sample_count) etime = time.time() print("Done in {:.01f}s.".format(etime - stime)) # Save results. csv_writer.writerow({ 'crop_index': crop_index, 'l1': err_l1, 'l2': err_l2, 'relmse': err_relmse, 'elpips-vgg': err_elpips_vgg_mean, 'elpips-vgg-stdev': err_elpips_vgg_std })
raise Exception('Unsupported metric') BATCH_SIZE = args.batch_size # Load images. image1 = imageio.imread(args.image[0])[:,:,0:3].astype(np.float32) / 255.0 image2 = imageio.imread(args.image[1])[:,:,0:3].astype(np.float32) / 255.0 assert image1.shape == image2.shape # Create the distance metric. if args.metric == 'elpips_vgg': config = elpips.elpips_vgg(batch_size=BATCH_SIZE, n=1) elif args.metric == 'elpips_squeeze_maxpool': confi = elpips.elpips_squeeze_maxpool(batch_size=BATCH_SIZE, n=1) else: raise Exception('Unsupported metric') config.set_scale_levels_by_image_size(image1.shape[0], image1.shape[1]) metric = elpips.Metric(config, back_prop=False) # Create the computation graph. print("Creating computation graph.") tf_image1 = tf.placeholder(tf.float32) tf_image2 = tf.placeholder(tf.float32) # Extend single images into small minibatches to take advantage of the implementation's Latin Hypercube Sampling.
if args.metric not in ('elpips_vgg', 'lpips_vgg', 'lpips_squeeze'): raise Exception('Unsupported metric') # Load images. reference_image = imageio.imread(args.reference_image[0])[:, :, 0:3].astype( np.float32) / 255.0 image1 = imageio.imread(args.image[0])[:, :, 0:3].astype(np.float32) / 255.0 image2 = imageio.imread(args.image[1])[:, :, 0:3].astype(np.float32) / 255.0 assert image1.shape == reference_image.shape assert image2.shape == reference_image.shape # Create the distance metric. if args.metric == 'elpips_vgg': # Use E-LPIPS-VGG averages over n samples. config = elpips.elpips_vgg(batch_size=1, n=args.n) config.set_scale_levels_by_image_size(reference_image.shape[0], reference_image.shape[1]) metric = elpips.Metric(config, back_prop=False) elif args.metric == 'lpips_vgg': # Use LPIPS-VGG. metric = elpips.Metric(elpips.lpips_vgg(1), back_prop=False) elif args.metric == 'lpips_squeeze': # Use LPIPS-SQUEEZENET. metric = elpips.Metric(elpips.lpips_squeeze(1), back_prop=False) else: raise Exception('Unknown metric') # Create the computation graph. print("Creating computation graph.") tf_reference_image = tf.placeholder(tf.float32)
help='number of samples to use for E-LPIPS. Default: 200') args = parser.parse_args() if args.metric not in ('elpips_vgg', 'lpips_vgg', 'lpips_squeeze'): raise Exception('Unsupported metric') # Load images. image1 = imageio.imread(args.image[0])[:, :, 0:3].astype(np.float32) / 255.0 image2 = imageio.imread(args.image[1])[:, :, 0:3].astype(np.float32) / 255.0 assert image1.shape == image2.shape # Create the distance metric. if args.metric == 'elpips_vgg': # Use E-LPIPS averages over n samples. metric = elpips.Metric(elpips.elpips_vgg(batch_size=1, n=args.n), back_prop=False) elif args.metric == 'lpips_vgg': # Use LPIPS-VGG. metric = elpips.Metric(elpips.lpips_vgg(1), back_prop=False) elif args.metric == 'lpips_squeeze': # Use LPIPS-SQUEEZENET. metric = elpips.Metric(elpips.lpips_squeeze(1), back_prop=False) else: raise Exception('Unspported metric') # Create the computation graph. print("Creating computation graph.") tf_image1 = tf.placeholder(tf.float32) tf_image2 = tf.placeholder(tf.float32) tf_evaluate_distance = metric.forward(tf_image1, tf_image2)
def run_metrics(prediction, target_x, target_dx=None, target_dy=None, source=None): with tf.name_scope('metrics'): def to_ldr_nhwc(x): '''Prepares an image for the perceptual losses.''' x = tf.maximum(0.0, x) x = layers.srgb_to_nonlinear(x) x = image.tf_to_nhwc(x) return x elpips_vgg_config = elpips.elpips_vgg(config.BATCH_SIZE) elpips_vgg_config.fast_and_approximate = True elpips_vgg_config.set_scale_levels(2) elpips_squeezenet_config = elpips.elpips_squeeze_maxpool( config.BATCH_SIZE) elpips_squeezenet_config.fast_and_approximate = True elpips_squeezenet_config.set_scale_levels(2) if config.model.elpips_eval_count is not None: elpips_vgg_config.average_over = config.model.elpips_eval_count elpips_squeezenet_config.average_over = config.model.elpips_eval_count elpips_vgg = elpips.Metric(elpips_vgg_config) elpips_squeeze_maxpool = elpips.Metric(elpips_squeezenet_config) lpips_vgg = elpips.Metric(elpips.lpips_vgg(config.BATCH_SIZE)) lpips_squeeze = elpips.Metric(elpips.lpips_squeeze(config.BATCH_SIZE)) assert config.PAD_WIDTH > 0 if config.PAD_WIDTH > 0: shape = tf.shape(prediction) N, C, H, W = shape[0], shape[1], shape[2], shape[3] X0, Y0 = config.PAD_WIDTH + config.model.vary_padding, config.PAD_WIDTH + config.model.vary_padding X1, Y1 = W - config.PAD_WIDTH - config.model.vary_padding, H - config.PAD_WIDTH - config.model.vary_padding prediction = prediction[:, :, Y0:Y1, X0:X1] target_x = target_x[:, :, Y0:Y1, X0:X1] if target_dx is not None: target_dx = target_dx[:, :, Y0:Y1, X0:X1] if target_dy is not None: target_dy = target_dy[:, :, Y0:Y1, X0:X1] if source is not None: source = source[:, :, Y0:Y1, X0:X1] l1_error = tf.losses.absolute_difference(target_x, prediction) prediction_reinhard = prediction / ( 1.0 + tf.reduce_mean(tf.abs(prediction), axis=1, keepdims=True)) target_reinhard = target_x / ( 1.0 + tf.reduce_mean(tf.abs(target_x), axis=1, keepdims=True)) l1_tonemap_error = tf.losses.absolute_difference( target_reinhard, prediction_reinhard) mean_color_prediction = tf.reduce_mean(prediction, axis=[2, 3]) mean_color_target_x = tf.reduce_mean(target_x, axis=[2, 3]) mean_color_error_l1 = tf.reduce_mean( tf.abs(mean_color_prediction - mean_color_target_x)) negative_loss = tf.reduce_mean(tf.maximum(-prediction, 0.0)) # RelMSE. def RelMSE(prediction, reference): EPSILON = 0.001 grayscale_reference = tf.reduce_mean(reference, axis=1, keepdims=True) image_error = prediction - reference relmse_image = tf.reduce_mean( tf.square(image_error), axis=1, keepdims=True) / (EPSILON + tf.square(grayscale_reference)) return tf.reduce_mean(relmse_image, axis=[0, 1, 2, 3]) relmse = RelMSE(prediction, target_x) # Perceptual-tonemap-sRGB perceptual_prediction = to_ldr_nhwc(prediction_reinhard) perceptual_target = to_ldr_nhwc(target_reinhard) elpips_squeeze_maxpool_loss = tf.reduce_mean( elpips_squeeze_maxpool.forward(perceptual_prediction, perceptual_target)) elpips_vgg_loss = tf.reduce_mean( elpips_vgg.forward(perceptual_prediction, perceptual_target)) lpips_squeeze_loss = tf.reduce_mean( lpips_squeeze.forward(perceptual_prediction, perceptual_target)) lpips_vgg_loss = tf.reduce_mean( lpips_vgg.forward(perceptual_prediction, perceptual_target)) metrics = { 'L1': l1_error, 'L1_tonemap': l1_tonemap_error, 'RelMSE': relmse, 'elpips_squeeze_maxpool': elpips_squeeze_maxpool_loss, 'elpips_vgg': elpips_vgg_loss, 'lpips_squeeze': lpips_squeeze_loss, 'lpips_vgg': lpips_vgg_loss, 'mean_color_L1': mean_color_error_l1, 'negative_loss': negative_loss } if target_dx is not None and target_dy is not None: prediction_dx = layers.dx(prediction) prediction_dy = layers.dy(prediction) prediction_dx_reinhard = prediction_dx / (1.0 + tf.reduce_mean( tf.abs(prediction_dx), axis=1, keepdims=True)) prediction_dy_reinhard = prediction_dy / (1.0 + tf.reduce_mean( tf.abs(prediction_dy), axis=1, keepdims=True)) target_dx_reinhard = target_dx / ( 1.0 + tf.reduce_mean(tf.abs(target_dx), axis=1, keepdims=True)) target_dy_reinhard = target_dy / ( 1.0 + tf.reduce_mean(tf.abs(target_dy), axis=1, keepdims=True)) gradient_l1_error = ( tf.losses.absolute_difference(target_dx, prediction_dx) + tf.losses.absolute_difference(target_dy, prediction_dy)) metrics['grad_L1'] = gradient_l1_error gradient_l1t_error = ( tf.losses.absolute_difference(target_dx_reinhard, prediction_dx_reinhard) + tf.losses.absolute_difference(target_dy_reinhard, prediction_dy_reinhard)) metrics['grad_L1_tonemap'] = gradient_l1t_error return metrics