image1 = imageio.imread(args.image[0])[:, :, 0:3].astype(np.float32) / 255.0 image2 = imageio.imread(args.image[1])[:, :, 0:3].astype(np.float32) / 255.0 assert image1.shape == reference_image.shape assert image2.shape == reference_image.shape # Create the distance metric. if args.metric == 'elpips_vgg': # Use E-LPIPS-VGG averages over n samples. config = elpips.elpips_vgg(batch_size=1, n=args.n) config.set_scale_levels_by_image_size(reference_image.shape[0], reference_image.shape[1]) metric = elpips.Metric(config, back_prop=False) elif args.metric == 'lpips_vgg': # Use LPIPS-VGG. metric = elpips.Metric(elpips.lpips_vgg(1), back_prop=False) elif args.metric == 'lpips_squeeze': # Use LPIPS-SQUEEZENET. metric = elpips.Metric(elpips.lpips_squeeze(1), back_prop=False) else: raise Exception('Unknown metric') # Create the computation graph. print("Creating computation graph.") tf_reference_image = tf.placeholder(tf.float32) tf_image1 = tf.placeholder(tf.float32) tf_image2 = tf.placeholder(tf.float32) tf_evaluate_distances_with_correlated_noise = metric.forward( (tf_image1, tf_image2), tf_reference_image) # Run.
help='base learning rate') parser.add_argument( '--datasets', type=str, nargs='+', default=['train/traditional', 'train/cnn', 'train/mix'], help= 'datasets to train on: [train/traditional],[train/cnn],[train/mix],[val/traditional],[val/cnn],[val/color],[val/deblur],[val/frameinterp],[val/superres]' ) opt = parser.parse_args() # Load model. # No input augmentation or dropout for training. model_config = elpips.lpips_vgg(batch_size=opt.batch_size) model_config.metric = opt.net # Implement the small network which tries to predict human 2afc results based on the estimated distances d(a, ref) and d(b, ref). def conv2d_1x1(name, input, input_feature_count, output_feature_count, W=None, b=None): W = tf.get_variable( name=name + "_W", shape=[1, 1, input_feature_count, output_feature_count] if W is None else None,
def run_metrics(prediction, target_x, target_dx=None, target_dy=None, source=None): with tf.name_scope('metrics'): def to_ldr_nhwc(x): '''Prepares an image for the perceptual losses.''' x = tf.maximum(0.0, x) x = layers.srgb_to_nonlinear(x) x = image.tf_to_nhwc(x) return x elpips_vgg_config = elpips.elpips_vgg(config.BATCH_SIZE) elpips_vgg_config.fast_and_approximate = True elpips_vgg_config.set_scale_levels(2) elpips_squeezenet_config = elpips.elpips_squeeze_maxpool( config.BATCH_SIZE) elpips_squeezenet_config.fast_and_approximate = True elpips_squeezenet_config.set_scale_levels(2) if config.model.elpips_eval_count is not None: elpips_vgg_config.average_over = config.model.elpips_eval_count elpips_squeezenet_config.average_over = config.model.elpips_eval_count elpips_vgg = elpips.Metric(elpips_vgg_config) elpips_squeeze_maxpool = elpips.Metric(elpips_squeezenet_config) lpips_vgg = elpips.Metric(elpips.lpips_vgg(config.BATCH_SIZE)) lpips_squeeze = elpips.Metric(elpips.lpips_squeeze(config.BATCH_SIZE)) assert config.PAD_WIDTH > 0 if config.PAD_WIDTH > 0: shape = tf.shape(prediction) N, C, H, W = shape[0], shape[1], shape[2], shape[3] X0, Y0 = config.PAD_WIDTH + config.model.vary_padding, config.PAD_WIDTH + config.model.vary_padding X1, Y1 = W - config.PAD_WIDTH - config.model.vary_padding, H - config.PAD_WIDTH - config.model.vary_padding prediction = prediction[:, :, Y0:Y1, X0:X1] target_x = target_x[:, :, Y0:Y1, X0:X1] if target_dx is not None: target_dx = target_dx[:, :, Y0:Y1, X0:X1] if target_dy is not None: target_dy = target_dy[:, :, Y0:Y1, X0:X1] if source is not None: source = source[:, :, Y0:Y1, X0:X1] l1_error = tf.losses.absolute_difference(target_x, prediction) prediction_reinhard = prediction / ( 1.0 + tf.reduce_mean(tf.abs(prediction), axis=1, keepdims=True)) target_reinhard = target_x / ( 1.0 + tf.reduce_mean(tf.abs(target_x), axis=1, keepdims=True)) l1_tonemap_error = tf.losses.absolute_difference( target_reinhard, prediction_reinhard) mean_color_prediction = tf.reduce_mean(prediction, axis=[2, 3]) mean_color_target_x = tf.reduce_mean(target_x, axis=[2, 3]) mean_color_error_l1 = tf.reduce_mean( tf.abs(mean_color_prediction - mean_color_target_x)) negative_loss = tf.reduce_mean(tf.maximum(-prediction, 0.0)) # RelMSE. def RelMSE(prediction, reference): EPSILON = 0.001 grayscale_reference = tf.reduce_mean(reference, axis=1, keepdims=True) image_error = prediction - reference relmse_image = tf.reduce_mean( tf.square(image_error), axis=1, keepdims=True) / (EPSILON + tf.square(grayscale_reference)) return tf.reduce_mean(relmse_image, axis=[0, 1, 2, 3]) relmse = RelMSE(prediction, target_x) # Perceptual-tonemap-sRGB perceptual_prediction = to_ldr_nhwc(prediction_reinhard) perceptual_target = to_ldr_nhwc(target_reinhard) elpips_squeeze_maxpool_loss = tf.reduce_mean( elpips_squeeze_maxpool.forward(perceptual_prediction, perceptual_target)) elpips_vgg_loss = tf.reduce_mean( elpips_vgg.forward(perceptual_prediction, perceptual_target)) lpips_squeeze_loss = tf.reduce_mean( lpips_squeeze.forward(perceptual_prediction, perceptual_target)) lpips_vgg_loss = tf.reduce_mean( lpips_vgg.forward(perceptual_prediction, perceptual_target)) metrics = { 'L1': l1_error, 'L1_tonemap': l1_tonemap_error, 'RelMSE': relmse, 'elpips_squeeze_maxpool': elpips_squeeze_maxpool_loss, 'elpips_vgg': elpips_vgg_loss, 'lpips_squeeze': lpips_squeeze_loss, 'lpips_vgg': lpips_vgg_loss, 'mean_color_L1': mean_color_error_l1, 'negative_loss': negative_loss } if target_dx is not None and target_dy is not None: prediction_dx = layers.dx(prediction) prediction_dy = layers.dy(prediction) prediction_dx_reinhard = prediction_dx / (1.0 + tf.reduce_mean( tf.abs(prediction_dx), axis=1, keepdims=True)) prediction_dy_reinhard = prediction_dy / (1.0 + tf.reduce_mean( tf.abs(prediction_dy), axis=1, keepdims=True)) target_dx_reinhard = target_dx / ( 1.0 + tf.reduce_mean(tf.abs(target_dx), axis=1, keepdims=True)) target_dy_reinhard = target_dy / ( 1.0 + tf.reduce_mean(tf.abs(target_dy), axis=1, keepdims=True)) gradient_l1_error = ( tf.losses.absolute_difference(target_dx, prediction_dx) + tf.losses.absolute_difference(target_dy, prediction_dy)) metrics['grad_L1'] = gradient_l1_error gradient_l1t_error = ( tf.losses.absolute_difference(target_dx_reinhard, prediction_dx_reinhard) + tf.losses.absolute_difference(target_dy_reinhard, prediction_dy_reinhard)) metrics['grad_L1_tonemap'] = gradient_l1t_error return metrics