def truncate_fancy(dlat, dlat_avg, model_scale=18, truncation_psi=0.7, minlayer=0, maxlayer=8, do_clip=False): layer_idx = np.arange(model_scale)[np.newaxis, :, np.newaxis] ones = np.ones(layer_idx.shape, dtype=np.float32) coefs = np.where(layer_idx < maxlayer, truncation_psi * ones, ones) if minlayer > 0: coefs[0, :minlayer, :] = ones[0, :minlayer, :] if do_clip: return tflib.lerp_clip(dlat_avg, dlat, coefs).eval() else: return tflib.lerp(dlat_avg, dlat, coefs)
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net): rotation_offset = 108 with tf.name_scope('DynamicRange'): x = tf.cast(x, tf.float32) x = misc.adjust_dynamic_range(x, drange_data, drange_net) if mirror_augment: with tf.name_scope('MirrorAugment'): random_vector = tf.random_uniform([tf.shape(x)[0]]) < 0.5 x = tf.where(random_vector, x, tf.reverse(x, [3])) rotation_cos = tf.expand_dims(labels[:, rotation_offset], axis=-1) rotation_sin = tf.expand_dims(labels[:, rotation_offset + 1], axis=-1) angle = tf.atan2(rotation_sin, rotation_cos) new_rotation_cos = tf.cos(angle) new_rotation_sin = tf.sin(angle) * -1 mirrored_labels = tf.concat([ labels[:, :rotation_offset], new_rotation_cos, new_rotation_sin, labels[:, rotation_offset + 2:] ], axis=1) labels = tf.where(random_vector, labels, mirrored_labels) with tf.name_scope( 'FadeLOD' ): # Smooth crossfade between consecutive levels-of-detail. s = tf.shape(x) y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2]) y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) y = tf.tile(y, [1, 1, 1, 2, 1, 2]) y = tf.reshape(y, [-1, s[1], s[2], s[3]]) x = tflib.lerp(x, y, lod - tf.floor(lod)) with tf.name_scope( 'UpscaleLOD' ): # Upscale to match the expected input/output size of the networks. s = tf.shape(x) factor = tf.cast(2**tf.floor(lod), tf.int32) x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) # Multiply the rotation label by 2.0 # labels = tf.concat([ # labels[:, :rotation_offset], # labels[:, rotation_offset:rotation_offset + 2] * 2.0, # labels[:, rotation_offset + 2:] # ], axis=-1) return x, labels
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net): rotation_offset = 108 with tf.name_scope('DynamicRange'): x = tf.cast(x, tf.float32) x = misc.adjust_dynamic_range(x, drange_data, drange_net) if mirror_augment: with tf.name_scope('MirrorAugment'): random_vector = tf.random_uniform([tf.shape(x)[0]]) < 0.5 x = tf.where(random_vector, x, tf.reverse(x, [3])) indices_first = tf.range(rotation_offset) swaps = tf.constant([0, 7, 6, 5, 4, 3, 2, 1]) + rotation_offset indices_last = tf.range(rotation_offset + 8, tf.shape(labels)[1]) indices = tf.concat([indices_first, swaps, indices_last], axis=0) mirrored_labels = tf.gather(labels, indices, axis=1) labels = tf.where(random_vector, labels, mirrored_labels) with tf.name_scope( 'FadeLOD' ): # Smooth crossfade between consecutive levels-of-detail. s = tf.shape(x) y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2]) y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) y = tf.tile(y, [1, 1, 1, 2, 1, 2]) y = tf.reshape(y, [-1, s[1], s[2], s[3]]) x = tflib.lerp(x, y, lod - tf.floor(lod)) with tf.name_scope( 'UpscaleLOD' ): # Upscale to match the expected input/output size of the networks. s = tf.shape(x) factor = tf.cast(2**tf.floor(lod), tf.int32) x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) with tf.name_scope('BalanceLabels'): random_mask = tf.cast(tf.random_uniform([tf.shape(x)[0], 1]) < 0.5, dtype=tf.float32) fl = 1 fr = 7 labels = tf.concat([ labels[:, :rotation_offset + fl], labels[:, rotation_offset + fl:rotation_offset + fl + 1] * random_mask, labels[:, rotation_offset + fl + 1:rotation_offset + fr], labels[:, rotation_offset + fr:rotation_offset + fr + 1] * random_mask, labels[:, rotation_offset + fr + 1:] ], axis=-1) return x, labels
def D_wgan_gp( G, D, opt, training_set, minibatch_size, reals, labels, # pylint: disable=unused-argument wgan_lambda=10.0, # Weight for the gradient penalty term. wgan_epsilon=0.001, # Weight for the epsilon term, \epsilon_{drift}. wgan_target=1.0, ): # Target value for gradient magnitudes. latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) real_scores_out = autosummary("Loss/scores/real", real_scores_out) fake_scores_out = autosummary("Loss/scores/fake", fake_scores_out) loss = fake_scores_out - real_scores_out with tf.name_scope("GradientPenalty"): mixing_factors = tf.random_uniform( [minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype ) mixed_images_out = tflib.lerp( tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors ) mixed_scores_out = fp32( D.get_output_for(mixed_images_out, labels, is_training=True) ) mixed_scores_out = autosummary("Loss/scores/mixed", mixed_scores_out) mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out)) mixed_grads = opt.undo_loss_scaling( fp32(tf.gradients(mixed_loss, [mixed_images_out])[0]) ) mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3])) mixed_norms = autosummary("Loss/mixed_norms", mixed_norms) gradient_penalty = tf.square(mixed_norms - wgan_target) loss += gradient_penalty * (wgan_lambda / (wgan_target ** 2)) with tf.name_scope("EpsilonPenalty"): epsilon_penalty = autosummary( "Loss/epsilon_penalty", tf.square(real_scores_out) ) loss += epsilon_penalty * wgan_epsilon return loss
def D_wgan_gp( G, D, opt, training_set, minibatch_size, reals, labels, wgan_lambda=10.0, wgan_epsilon=0.001, wgan_target=1.0, ): _ = opt, training_set latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) real_scores_out = autosummary("Loss/scores/real", real_scores_out) fake_scores_out = autosummary("Loss/scores/fake", fake_scores_out) loss = fake_scores_out - real_scores_out with tf.name_scope("EpsilonPenalty"): epsilon_penalty = autosummary("Loss/epsilon_penalty", tf.square(real_scores_out)) loss += epsilon_penalty * wgan_epsilon with tf.name_scope("GradientPenalty"): mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) mixed_scores_out = D.get_output_for(mixed_images_out, labels, is_training=True) mixed_scores_out = autosummary("Loss/scores/mixed", mixed_scores_out) mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out), [mixed_images_out])[0] mixed_norms = tf.sqrt( tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3])) mixed_norms = autosummary("Loss/mixed_norms", mixed_norms) gradient_penalty = tf.square(mixed_norms - wgan_target) reg = gradient_penalty * (wgan_lambda / (wgan_target**2)) return loss, reg
def grow(res, lod): x = lambda: fromrgb(downscale2d(images_in, 2 ** lod), res) if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) x = block(x(), res) y = lambda: x if res > 2: y = cset( y, (lod_in > lod), lambda: tflib.lerp( x, fromrgb(downscale2d(images_in, 2 ** (lod + 1)), res - 1), lod_in - lod, ), ) return y()
def grow(res, lod): x = lambda: fromrgb(downscale2d(images_in, 2**lod), res ) # 先暂时将下采样函数赋给x if lod > 0: x = cset( x, (lod_in < lod), lambda: grow(res + 1, lod - 1) ) # 非第一层时,如果输入层数lod_in小于当前层lod的话,表明可以进入到下一级分辨率上了,将grow()赋给x;否则x还是保留为下采样函数。 x = block(x(), res) y = lambda: x # x执行一次自身的函数,构建出一个block,并将结果赋给y(以函数的形式) if res > 2: y = cset( y, (lod_in > lod), lambda: tflib.lerp( x, fromrgb(downscale2d(images_in, 2** (lod + 1)), res - 1), lod_in - lod) ) # 非最后一层时,如果输入层数lod_in大于当前层lod的话,表明需要进行插值操作,将lerp()赋给y;否则y还是保留为之前的操作。 return y()
def wgangp(G, D, aug, fake_labels, real_images, real_labels, wgan_epsilon=0.001, wgan_lambda=10, wgan_target=1, **_kwargs): minibatch_size = tf.shape(fake_labels)[0] fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) G_fake = eval_G(G, fake_latents, fake_labels) D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake') D_real = eval_D(D, aug, real_images, real_labels, report='real') # WGAN loss from "Wasserstein Generative Adversarial Networks". with tf.name_scope('Loss_main'): G_loss = -D_fake.scores # pylint: disable=invalid-unary-operand-type D_loss = D_fake.scores - D_real.scores # Epsilon penalty from "Progressive Growing of GANs for Improved Quality, Stability, and Variation" with tf.name_scope('Loss_epsilon'): epsilon_penalty = report_stat(aug, 'Loss/epsilon_penalty', tf.square(D_real.scores)) D_loss += epsilon_penalty * wgan_epsilon # Gradient penalty from "Improved Training of Wasserstein GANs". with tf.name_scope('Loss_GP'): mix_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0, 1, dtype=G_fake.images.dtype) mix_images = tflib.lerp(tf.cast(real_images, G_fake.images.dtype), G_fake.images, mix_factors) mix_labels = real_labels # NOTE: Mixing is performed without respect to fake_labels. D_mix = eval_D(D, aug, mix_images, mix_labels, report='mix') mix_grads = tf.gradients(tf.reduce_sum(D_mix.scores), [mix_images])[0] mix_norms = tf.sqrt(tf.reduce_sum(tf.square(mix_grads), axis=[1, 2, 3])) mix_norms = report_stat(aug, 'Loss/mix_norms', mix_norms) gradient_penalty = tf.square(mix_norms - wgan_target) D_reg = gradient_penalty * (wgan_lambda / (wgan_target**2)) return report_loss(aug, G_loss, D_loss, None, D_reg)
def grow(res, lod): x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2 ** lod), res) if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) x = block(x(), res) y = lambda: x y = cset( y, (lod_in > lod), lambda: tflib.lerp( x, fromrgb( naive_downsample_2d(images_in, factor=2 ** (lod + 1)), res - 1 ), lod_in - lod, ), ) return y()
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net): with tf.name_scope('DynamicRange'): x = tf.cast(x, tf.float32) x = misc.adjust_dynamic_range(x, drange_data, drange_net) if mirror_augment: with tf.name_scope('MirrorAugment'): random_vector = tf.random_uniform([tf.shape(x)[0]]) < 0.5 x = tf.where(random_vector, x, tf.reverse(x, [3])) rotation_offset = 108 indices_first = tf.range(rotation_offset) swaps = tf.constant([0, 7, 6, 5, 4, 3, 2, 1]) + rotation_offset indices_last = tf.range(rotation_offset + 8, tf.shape(labels)[1]) indices = tf.concat([indices_first, swaps, indices_last], axis=0) mirrored_labels = tf.gather(labels, indices, axis=1) labels = tf.where(random_vector, labels, mirrored_labels) with tf.name_scope( 'FadeLOD' ): # Smooth crossfade between consecutive levels-of-detail. s = tf.shape(x) y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2]) y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) y = tf.tile(y, [1, 1, 1, 2, 1, 2]) y = tf.reshape(y, [-1, s[1], s[2], s[3]]) x = tflib.lerp(x, y, lod - tf.floor(lod)) with tf.name_scope( 'UpscaleLOD' ): # Upscale to match the expected input/output size of the networks. s = tf.shape(x) factor = tf.cast(2**tf.floor(lod), tf.int32) x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) with tf.name_scope('RandomizeLabels'): keep_probability = 0.80 labels_bool = tf.cast(labels, tf.bool) mask = tf.random.uniform(tf.shape(labels), 0.0, 1.0) > (1 - keep_probability) label_remove = tf.cast(tf.math.logical_and(labels_bool, mask), dtype=tf.float32) # multiply_interval = (0.7, 1.3) # random_multiplier = tf.random.uniform(tf.shape(labels), multiply_interval[0], multiply_interval[1]) # labels_multiply = label_remove * random_multiplier labels = tf.concat([labels[:, :1], label_remove[:, 1:]], axis=-1) return x, labels
def D_hinge_gp( G, D, opt, training_set, minibatch_size, reals, labels, # pylint: disable=unused-argument wgan_lambda=10.0, # Weight for the gradient penalty term. wgan_target=1.0): # Target value for gradient magnitudes. latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) fake_scores_out = fp32( D.get_output_for(fake_images_out, labels, is_training=True)) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = tf.maximum(0., 1. + fake_scores_out) + tf.maximum( 0., 1. - real_scores_out) with tf.name_scope('GradientPenalty'): mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) mixed_scores_out = fp32( D.get_output_for(mixed_images_out, labels, is_training=True)) mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out) mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out)) mixed_grads = opt.undo_loss_scaling( fp32( tf.gradients( mixed_loss, [mixed_images_out], colocate_gradients_with_ops=colocate_gradients)[0])) mixed_norms = tf.sqrt( tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3])) mixed_norms = autosummary('Loss/mixed_norms', mixed_norms) gradient_penalty = tf.square(mixed_norms - wgan_target) loss += gradient_penalty * (wgan_lambda / (wgan_target**2)) return loss
def process_reals(x, labels, lod, mirror_augment, mirror_augment_v, spatial_augmentations, drange_data, drange_net): with tf.name_scope('DynamicRange'): x = tf.cast(x, tf.float32) x = misc.adjust_dynamic_range(x, drange_data, drange_net) if mirror_augment: with tf.name_scope('MirrorAugment'): x = tf.where( tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3])) if mirror_augment_v: with tf.name_scope('MirrorAugment_V'): x = tf.where( tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [2])) if spatial_augmentations: with tf.name_scope('SpatialAugmentations'): pre = tf.transpose(x, [0, 2, 3, 1]) post = tf.map_fn(misc.apply_random_aug, pre) x = tf.transpose(post, [0, 3, 1, 2]) if save_image_summaries: with tf.name_scope('ImageSummaries'), tf.device('/cpu:0'): tf.summary.image("reals_pre-augment", pre) tf.summary.image("reals_post-augment", post) with tf.name_scope( 'FadeLOD' ): # Smooth crossfade between consecutive levels-of-detail. s = tf.shape(x) y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2]) y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) y = tf.tile(y, [1, 1, 1, 2, 1, 2]) y = tf.reshape(y, [-1, s[1], s[2], s[3]]) x = tflib.lerp(x, y, lod - tf.floor(lod)) with tf.name_scope( 'UpscaleLOD' ): # Upscale to match the expected input/output size of the networks. s = tf.shape(x) factor = tf.cast(2**tf.floor(lod), tf.int32) x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) return x, labels
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net): with tf.name_scope('DynamicRange'): x = tf.cast(x, tf.float32) x = misc.adjust_dynamic_range(x, drange_data, drange_net) if mirror_augment: with tf.name_scope('MirrorAugment'): x = tf.where(tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3])) with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail. s = tf.shape(x) y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2]) y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) y = tf.tile(y, [1, 1, 1, 2, 1, 2]) y = tf.reshape(y, [-1, s[1], s[2], s[3]]) x = tflib.lerp(x, y, lod - tf.floor(lod)) with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks. s = tf.shape(x) factor = tf.cast(2 ** tf.floor(lod), tf.int32) x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) return x, labels
def grow(x, res, lod): y = block(res, x) img = lambda: upscale2d(torgb(res, y), 2**lod) img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod)) if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) return img()
def _evaluate(self, Gs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph. distance_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() noise_vars = [ var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise') ] # Generate random latents and interpolation t-values. lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) lerp_t = tf.random_uniform( [self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) # Interpolate in W or Z. if self.space == 'w': dlat_t01 = Gs_clone.components.mapping.get_output_for( lat_t01, None, is_validation=True) dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_e1 = tflib.lerp( dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) else: # space == 'z' lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) dlat_e01 = Gs_clone.components.mapping.get_output_for( lat_e01, None, is_validation=True) # Synthesize images. with tf.control_dependencies([ var.initializer for var in noise_vars ]): # use same noise inputs for the entire minibatch images = Gs_clone.components.synthesis.get_output_for( dlat_e01, is_validation=True, randomize_noise=False) # Crop only the face region. c = int(images.shape[2] // 8) images = images[:, :, c * 3:c * 7, c * 2:c * 6] # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. if images.shape[2] > 256: factor = images.shape[2] // 256 images = tf.reshape(images, [ -1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor ]) images = tf.reduce_mean(images, axis=[3, 5]) # Scale dynamic range from [-1,1] to [0,255] for VGG. images = (images + 1) * (255 / 2) # Evaluate perceptual distance. img_e0, img_e1 = images[0::2], images[1::2] distance_measure = misc.load_pkl( 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl' ) distance_expr.append( distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) # Sampling loop. all_distances = [] for _ in range(0, self.num_samples, minibatch_size): all_distances += tflib.run(distance_expr) all_distances = np.concatenate(all_distances, axis=0) # Reject outliers. lo = np.percentile(all_distances, 1, interpolation='lower') hi = np.percentile(all_distances, 99, interpolation='higher') filtered_distances = np.extract( np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) self._report_result(np.mean(filtered_distances))
def _evaluate(self, Gs, Gs_kwargs, num_gpus): Gs_kwargs = dict(Gs_kwargs) Gs_kwargs.update(self.Gs_overrides) minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph distance_expr = [] for gpu_idx in range(num_gpus): with tf.device("/gpu:%d" % gpu_idx): Gs_clone = Gs.clone() noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith("noise")] # Generate random latents and interpolation t-values lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == "full" else 0.0) labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1]) # Interpolate in W or Z if self.space == "w": dlat_t01 = Gs_clone.get_output_for(latents, labels, **Gs_kwargs, return_dlatents = True)[-1] dlat_t01 = tf.cast(dlat_t01, tf.float32) dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis = 1), dlat_t01.shape) else: lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis = 1), lat_t01.shape) dlat_e01 = Gs_clone.get_output_for(latents, labels, **Gs_kwargs, return_dlatents = True)[-1] # Synthesize images with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch imgs = Gs_clone.get_output_for(dlat_e01, labels, randomize_noise = False, **Gs_kwargs, take_dlatents = True)[0] imgs = tf.cast(imgs, tf.float32) # Crop only the face region if self.crop: c = int(imgs.shape[2] // 8) imgs = imgs[:, :, c*3 : c*7, c*2 : c*6] # Downsample image to 256x256 if it"s larger than that. VGG was built for 224x224 images factor = imgs.shape[2] // 256 if factor > 1: imgs = tf.reshape(imgs, [-1, imgs.shape[1], imgs.shape[2] // factor, factor, imgs.shape[3] // factor, factor]) imgs = tf.reduce_mean(imgs, axis=[3,5]) # Scale dynamic range from [-1,1] to [0,255] for VGG imgs = (imgs + 1) * (255 / 2) # Evaluate perceptual distance img_e0, img_e1 = imgs[0::2], imgs[1::2] distance_measure = misc.load_pkl("http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl") distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) # Sampling loop all_distances = [] for begin in range(0, self.num_samples, minibatch_size): self._report_progress(begin, self.num_samples) all_distances += tflib.run(distance_expr) all_distances = np.concatenate(all_distances, axis = 0) # Reject outliers lo = np.percentile(all_distances, 1, interpolation = "lower") hi = np.percentile(all_distances, 99, interpolation = "higher") filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) self._report_result(np.mean(filtered_distances))
def G_style( latents_in, # 第一个输入:Z码向量 [minibatch, latent_size]. labels_in, # 第二个输入:条件标签 [minibatch, label_size]. truncation_psi = 0.7, # 截断技巧的样式强度乘数。 None = disable. truncation_cutoff = 8, # 要应用截断技巧的层数。 None = disable. truncation_psi_val = None, # 验证期间要使用的truncation_psi的值。 truncation_cutoff_val = None, # 验证期间要使用的truncation_cutoff的值。 dlatent_avg_beta = 0.995, # 在训练期间跟踪W的移动平均值的衰减率。 None = disable. style_mixing_prob = 0.9, # 训练期间混合样式的概率。 None = disable. is_training = False, # 网络正在接受训练? 这个选择可以启用和禁用特定特征。 is_validation = False, # 网络正在验证中? 这个选择用于确定truncation_psi的值。 is_template_graph = False, # True表示由Network类构造的模板图,False表示实际评估。 components = dnnlib.EasyDict(), # 子网络的容器。调用时候保留。 **kwargs): # 子网络的参数们 (G_mapping 和 G_synthesis)。 # 参数验证。 assert not is_training or not is_validation # 不能同时出现训练/验证状态 assert isinstance(components, dnnlib.EasyDict) # components作为EasyDict类,后续被用来装下synthesis和mapping两个网络 if is_validation: # 把验证期间要使用的truncation_psi_val和truncation_cutoff_val值赋过来(默认是None),也就是验证期不使用截断 truncation_psi = truncation_psi_val truncation_cutoff = truncation_cutoff_val if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): truncation_psi = None # 训练期间或截断率为1时,不使用截断 if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0): truncation_cutoff = None # 训练期间或截断层为0时,不使用截断 if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): dlatent_avg_beta = None # 非训练期间或计算平均W时的衰减率为1时,不使用衰减 if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): style_mixing_prob = None # 非训练期间或样式混合的概率小于等于0时,不使用样式混合 # 设置子网络。 if 'synthesis' not in components: # 载入合成网络 components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs) num_layers = components.synthesis.input_shape[1] # num_layers = 18 dlatent_size = components.synthesis.input_shape[2] # dlatent_size = (18,512) if 'mapping' not in components: # 载入映射网络 components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs) # 设置变量。 lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) # 初始化为0。lod的定义式为:lod = resolution_log2 - res,其中resolution_log2(=10)表示最终分辨率级别,res表示当前层对应的分辨率级别(2-10)。 dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) # 人脸平均值 # 计算映射网络输出。 dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs) # 更新W的移动平均值。 if dlatent_avg_beta is not None: with tf.variable_scope('DlatentAvg'): batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) # 找到新batch的dlatent平均值 # ??? update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) # 把batch的dlatent平均值朝着总dlatent平均值以dlatent_avg_beta步幅靠近,作为新的人脸dlatent平均值 # update_op 是一个用于sess的变量 with tf.control_dependencies([update_op]): dlatents = tf.identity(dlatents) # 确保update_op操作完成 # with tf.control_dependencies: 在with包含的操作operation执行前先执行op列表即[update_up]: # tf.identity(x)是一个operation。所以会确保update_op操作完成。 # tf.identity是返回一个一模一样新的tensor的op,这会增加一个新节点到gragh中 # 执行样式混合正则化。 if style_mixing_prob is not None: with tf.name_scope('StyleMix'): latents2 = tf.random_normal(tf.shape(latents_in)) dlatents2 = components.mapping.get_output_for(latents2, labels_in, **kwargs) # 用来做样式混合的随机中间向量 layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] # 层的索引:[[[0],[1],[2],[3],[4]...[17]]] cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 # 当前层等于总层数减去lod_in的两倍,因为每个分辨率对应两层 mixing_cutoff = tf.cond( tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, # 如果随机值小于样式混合的概率,则从1到当前层随机选一个层,否则保留原层 lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), lambda: cur_layers) dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) # 对于1到mixing_cutoff层保留dlatents的值,其余层采用dlatents2的值替换 # 应用截断技巧。 if truncation_psi is not None and truncation_cutoff is not None: with tf.variable_scope('Truncation'): layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] # 层的索引:[[[0],[1],[2],[3],[4]...[17]]] ones = np.ones(layer_idx.shape, dtype=np.float32) # ones:[[[1],[1],[1],[1],[1]...[1]]] coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones) # 截断的步幅,需要截断的层步幅为truncation_psi,否则为1 dlatents = tflib.lerp(dlatent_avg, dlatents, coefs) # 截断,用平均脸dlatent_avg朝着当前脸dlatents以coefs步幅靠近,最后得到的结果取代当前脸dlatents # 计算合成网络输出。 with tf.control_dependencies([tf.assign(components.synthesis.find_var('lod'), lod_in)]): images_out = components.synthesis.get_output_for(dlatents, force_clean_graph=is_template_graph, **kwargs) return tf.identity(images_out, name='images_out') # 返回生成的图片
def D_loss( G, D, reals, # A batch of real images labels, # A batch of labels (default 0s if no labels) minibatch_size, # Size of each minibatch loss_type, # Loss type: logistic, hinge, wgan reg_type, # Regularization type: r1, t2, gp (mixed) gamma=10.0, # Regularization strength wgan_epsilon=0.001, # Wasserstein epsilon (for wgan only) wgan_target=1.0): # Wasserstein target (for wgan only) latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_imgs_out = G.get_output_for(latents, labels, isegs, is_training=True)[0] real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_imgs_out, labels, is_training=True) real_scores_out = autosummary("Loss/scores/real", real_scores_out) fake_scores_out = autosummary("Loss/scores/fake", fake_scores_out) if loss_type == "logistic": loss = tf.nn.softplus(fake_scores_out) loss += tf.nn.softplus(-real_scores_out) elif loss_type == "hinge": loss = tf.maximum(0.0, 1.0 + fake_scores_out) loss += tf.maximum(0.0, 1.0 - real_scores_out) elif loss_type == "wgan": loss = fake_scores_out - real_scores_out if loss_type == "wgan": with tf.name_scope("EpsilonPenalty"): epsilon_penalty = autosummary("Loss/epsilon_penalty", tf.square(real_scores_out)) loss += epsilon_penalty * wgan_epsilon reg = None with tf.name_scope("GradientPenalty"): if reg_type in ["r1", "r2"]: if reg_type == "r1": grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0] else: grads = tf.gradients(tf.reduce_sum(fake_scores_out), [fake_imgs_out])[0] gradient_penalty = tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]) gradient_penalty = autosummary("Loss/gradient_penalty", gradient_penalty) reg = gradient_penalty * (gamma * 0.5) elif reg_type == "gp": mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_imgs_out.dtype) mixed_imgs_out = tflib.lerp(tf.cast(reals, fake_imgs_out.dtype), fake_imgs_out, mixing_factors) mixed_scores_out = D.get_output_for(mixed_imgs_out, labels, is_training=True) mixed_scores_out = autosummary("Loss/scores/mixed", mixed_scores_out) mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out), [mixed_imgs_out])[0] mixed_norms = tf.sqrt( tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3])) mixed_norms = autosummary("Loss/mixed_norms", mixed_norms) gradient_penalty = tf.square(mixed_norms - wgan_target) reg = gradient_penalty * (gamma / (wgan_target**2)) return loss, reg
def _evaluate(self, Gs, Gs_kwargs, num_gpus, **kwargs): Gs_kwargs = dict(Gs_kwargs) Gs_kwargs.update(self.Gs_overrides) minibatch_per_gpu = (self.n_samples_per_dim - 1) // num_gpus + 1 if (not self.no_mapping) and (not self.no_convert): Gs = Gs.convert( new_func_name='training.ps_sc_networks2.G_main_ps_sc') # Construct TensorFlow graph. n_continuous = Gs.input_shape[1] distance_expr = [] eval_dim_phs = [] lat_start_alpha_phs = [] lat_end_alpha_phs = [] lat_sample_phs = [] lerps_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() if self.no_mapping: noise_vars = [ var for name, var in Gs_clone.vars.items() if name.startswith('noise') ] else: noise_vars = [ var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise') ] # Latent pairs placeholder eval_dim = tf.placeholder(tf.int32) lat_start_alpha = tf.placeholder( tf.float32) # should be in [0, 1] lat_end_alpha = tf.placeholder( tf.float32) # should be in [0, 1] eval_dim_phs.append(eval_dim) lat_start_alpha_phs.append(lat_start_alpha) lat_end_alpha_phs.append(lat_end_alpha) eval_dim_mask = tf.tile( tf.one_hot(eval_dim, n_continuous)[tf.newaxis, :] > 0, [minibatch_per_gpu, 1]) lerp_t = tf.linspace(lat_start_alpha, lat_end_alpha, minibatch_per_gpu) # [b] lerps_expr.append(lerp_t) lat_sample = tf.placeholder(tf.float32, shape=Gs_clone.input_shape[1:]) lat_sample_phs.append(lat_sample) # lat_t0 = tf.zeros([minibatch_per_gpu] + Gs_clone.input_shape[1:]) lat_t0 = tf.tile(lat_sample[tf.newaxis, :], [minibatch_per_gpu, 1]) if self.use_bound_4: lat_t0_min2 = tf.zeros_like(lat_t0) - 4 else: lat_t0_min2 = lat_t0 - 2 lat_t0 = tf.where(eval_dim_mask, lat_t0_min2, lat_t0) # [b, n_continuous] lat_t1 = tf.tile(lat_sample[tf.newaxis, :], [minibatch_per_gpu, 1]) if self.use_bound_4: lat_t1_add2 = tf.zeros_like(lat_t1) + 4 else: lat_t1_add2 = lat_t1 + 2 lat_t1 = tf.where(eval_dim_mask, lat_t1_add2, lat_t1) # [b, n_continuous] lat_e = tflib.lerp(lat_t0, lat_t1, lerp_t[:, tf.newaxis]) # [b, n_continuous] # labels = tf.reshape(self._get_random_labels_tf(minibatch_per_gpu), [minibatch_per_gpu, -1]) labels = tf.zeros([minibatch_per_gpu, 0], dtype=tf.float32) if self.no_mapping: dlat_e = lat_e else: dlat_e = get_return_v( Gs_clone.components.mapping.get_output_for( lat_e, labels, **Gs_kwargs), 1) # Synthesize images. with tf.control_dependencies([ var.initializer for var in noise_vars ]): # use same noise inputs for the entire minibatch if self.no_mapping: images = get_return_v( Gs_clone.get_output_for(dlat_e, labels, randomize_noise=False, **Gs_kwargs), 1) else: images = get_return_v( Gs_clone.components.synthesis.get_output_for( dlat_e, randomize_noise=False, **Gs_kwargs), 1) # print('images.shape:', images.get_shape().as_list()) images = tf.cast(images, tf.float32) # Crop only the face region. if self.crop: c = int(images.shape[2] // 8) images = images[:, :, c * 3:c * 7, c * 2:c * 6] # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. factor = images.shape[2] // 256 if factor > 1: images = tf.reshape(images, [ -1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor ]) images = tf.reduce_mean(images, axis=[3, 5]) # Scale dynamic range from [-1,1] to [0,255] for VGG. images = (images + 1) * (255 / 2) # Evaluate perceptual distance. if images.get_shape().as_list()[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) img_e0 = images[:-1] img_e1 = images[1:] distance_measure = misc.load_pkl( 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl' ) distance_tmp = distance_measure.get_output_for(img_e0, img_e1) print('distance_tmp.shape:', distance_tmp.get_shape().as_list()) distance_expr.append(distance_tmp) # Sampling loop n_segs_per_dim = (self.n_samples_per_dim - 1) // ( (minibatch_per_gpu - 1) * num_gpus) self.n_samples_per_dim = n_segs_per_dim * ( (minibatch_per_gpu - 1) * num_gpus) + 1 alphas = np.linspace(0., 1., num=(n_segs_per_dim * num_gpus) + 1) traversals_dim = [] for n in range(self.n_traversals): lat_sample_np = np.random.normal(size=Gs_clone.input_shape[1:]) all_distances = [] sum_distances = [] for i in range(n_continuous): self._report_progress(i, n_continuous) dim_distances = [] for j in range(n_segs_per_dim): fd = {} for k_gpu in range(num_gpus): fd.update({ eval_dim_phs[k_gpu]: i, lat_start_alpha_phs[k_gpu]: alphas[j * num_gpus + k_gpu], lat_end_alpha_phs[k_gpu]: alphas[j * num_gpus + k_gpu + 1], lat_sample_phs[k_gpu]: lat_sample_np }) distance_expr_out, lerps_expr_out = tflib.run( [distance_expr, lerps_expr], feed_dict=fd) dim_distances += distance_expr_out # dim_distances += tflib.run(distance_expr, feed_dict=fd) # print(lerps_expr_out) dim_distances = np.concatenate(dim_distances, axis=0) # print('dim_distances.shape:', dim_distances.shape) all_distances.append(dim_distances) sum_distances.append(np.sum(dim_distances)) traversals_dim.append(sum_distances) traversals_dim = np.array( traversals_dim) # shape: (n_traversals, n_continuous) avg_distance_per_dim = np.mean(traversals_dim, axis=0) std_distance_per_dim = np.std(traversals_dim, axis=0) # pdb.set_trace() active_mask = np.array(avg_distance_per_dim) > self.active_thresh active_distances = np.extract(active_mask, avg_distance_per_dim) active_stds = np.extract(active_mask, std_distance_per_dim) sum_distance = np.sum(active_distances) mean_distance = np.sum(active_distances) / len(avg_distance_per_dim) mean_std = np.sum(active_stds) / len(avg_distance_per_dim) norm_dis_std = np.sqrt(mean_distance * mean_distance + mean_std * mean_std) print('avg distance per dim:', avg_distance_per_dim) print('std distance per dim:', std_distance_per_dim) print('sum_distance:', sum_distance) print('mean_distance:', mean_distance) print('mean_std:', mean_std) print('norm_dis_std:', norm_dis_std) self._report_result(sum_distance, suffix='_sum_dist') self._report_result(mean_distance, suffix='_mean_dist') self._report_result(mean_std, suffix='_mean_std') self._report_result(norm_dis_std, suffix='_norm_dist_std') # pdb.set_trace() return {'tpl_per_dim': avg_distance_per_dim}
def _evaluate(self, Gs, Gs_kwargs, num_gpus): Gs_kwargs = dict(Gs_kwargs) Gs_kwargs.update(self.Gs_overrides) minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph. distance_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() noise_vars = [ var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise') ] # Generate random latents and interpolation t-values. lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) lerp_t = tf.random_uniform( [self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) labels = tf.reshape( tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1]) # Interpolate in W or Z. if self.space == 'w': dlat_t01 = Gs_clone.components.mapping.get_output_for( lat_t01, labels, **Gs_kwargs) dlat_t01 = tf.cast(dlat_t01, tf.float32) dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_e1 = tflib.lerp( dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) else: # space == 'z' lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) dlat_e01 = Gs_clone.components.mapping.get_output_for( lat_e01, labels, **Gs_kwargs) # Synthesize images. with tf.control_dependencies([ var.initializer for var in noise_vars ]): # use same noise inputs for the entire minibatch images = Gs_clone.components.synthesis.get_output_for( dlat_e01, randomize_noise=False, **Gs_kwargs) images = tf.cast(images, tf.float32) # Crop only the face region. if self.crop: c = int(images.shape[2] // 8) images = images[:, :, c * 3:c * 7, c * 2:c * 6] # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. factor = images.shape[2] // 256 if factor > 1: images = tf.reshape(images, [ -1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor ]) images = tf.reduce_mean(images, axis=[3, 5]) # Scale dynamic range from [-1,1] to [0,255] for VGG. images = (images + 1) * (255 / 2) # Evaluate perceptual distance. img_e0, img_e1 = images[0::2], images[1::2] distance_measure = misc.load_pkl( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2' ) # vgg16_zhang_perceptual.pkl distance_expr.append( distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) # Sampling loop. all_distances = [] for begin in range(0, self.num_samples, minibatch_size): self._report_progress(begin, self.num_samples) all_distances += tflib.run(distance_expr) all_distances = np.concatenate(all_distances, axis=0) # Reject outliers. lo = np.percentile(all_distances, 1, interpolation='lower') hi = np.percentile(all_distances, 99, interpolation='higher') filtered_distances = np.extract( np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) self._report_result(np.mean(filtered_distances))
def D_wgan_gp( G, D, opt, training_set, minibatch_size, reals, labels, infogan_nz, wgan_lambda=10.0, # Weight for the gradient penalty term. wgan_epsilon=0.001, # Weight for the epsilon term, \epsilon_{drift}. wgan_target=1.0, # Target value for gradient magnitudes. gpu_ix=None): latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) fake_scores_out = fp32( D.get_output_for(fake_images_out, labels, is_training=True)) if infogan_nz > 0: with tf.name_scope('InfoGANLoss'): ops = fake_scores_out.graph.get_operations() def filter_fn( op ): # Very similar to the corresponding function in hessian_penalties.py this_layer = 'QEncoding' in op.name this_gpu = 'GPU%d' % gpu_ix in op.name this_model = 'D_loss' in op.name op_found = this_layer and this_gpu and this_model return op_found r_op = list(filter(filter_fn, ops)) for r in r_op: print('Using %s' % r.name) assert len( r_op) == 1, 'Found %s ops with name QEncoding' % len(r_op) encoding = fake_scores_out.graph.get_tensor_by_name('%s:0' % r_op[0].name) print('Regularizing first %s Z components with InfoGAN Loss' % infogan_nz) mutual_information_loss = tf.losses.mean_squared_error( latents[:, :infogan_nz], encoding) mutual_information_loss = autosummary('Loss/InfoGAN', mutual_information_loss) else: mutual_information_loss = 0.0 real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = fake_scores_out - real_scores_out with tf.name_scope('GradientPenalty'): mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) mixed_scores_out = fp32( D.get_output_for(mixed_images_out, labels, is_training=True)) mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out) mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out)) mixed_grads = opt.undo_loss_scaling( fp32(tf.gradients(mixed_loss, [mixed_images_out])[0])) mixed_norms = tf.sqrt( tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3])) mixed_norms = autosummary('Loss/mixed_norms', mixed_norms) gradient_penalty = tf.square(mixed_norms - wgan_target) loss += gradient_penalty * (wgan_lambda / (wgan_target**2)) with tf.name_scope('EpsilonPenalty'): epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) loss += epsilon_penalty * wgan_epsilon return loss, mutual_information_loss
def G_main( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. latmask, # mask for split-frame latents blending dconst, # initial (const) layer displacement truncation_psi=0.5, # Style strength multiplier for the truncation trick. None = disable. truncation_cutoff=None, # Number of layers for which to apply the truncation trick. None = disable. truncation_psi_val=None, # Value for truncation_psi to use during validation. truncation_cutoff_val=None, # Value for truncation_cutoff to use during validation. dlatent_avg_beta=0.995, # Decay for tracking the moving average of W during training. None = disable. style_mixing_prob=0.9, # Probability of mixing styles during training. None = disable. is_training=False, # Network is under training? Enables and disables specific features. is_validation=False, # Network is under validation? Chooses which value to use for truncation_psi. return_dlatents=False, # Return dlatents in addition to the images? is_template_graph=False, # True = template graph constructed by the Network class, False = actual evaluation. components=dnnlib.EasyDict( ), # Container for sub-networks. Retained between calls. mapping_func='G_mapping', # Build func name for the mapping network. synthesis_func='G_synthesis_stylegan2', # Build func name for the synthesis network. **kwargs): # Arguments for sub-networks (mapping and synthesis). # Validate arguments. assert not is_training or not is_validation assert isinstance(components, dnnlib.EasyDict) if is_validation: truncation_psi = truncation_psi_val truncation_cutoff = truncation_cutoff_val if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): truncation_psi = None if is_training: truncation_cutoff = None if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): dlatent_avg_beta = None if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): style_mixing_prob = None # Setup components. if 'synthesis' not in components: components.synthesis = tflib.Network( 'G_synthesis', func_name=globals()[synthesis_func], **kwargs) num_layers = components.synthesis.input_shape[1] dlatent_size = components.synthesis.input_shape[2] if 'mapping' not in components: components.mapping = tflib.Network('G_mapping', func_name=globals()[mapping_func], dlatent_broadcast=num_layers, **kwargs) # Setup variables. lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) # Evaluate mapping network. dlatents = components.mapping.get_output_for(latents_in, labels_in, is_training=is_training, **kwargs) dlatents = tf.cast(dlatents, tf.float32) # Update moving average of W. if dlatent_avg_beta is not None: with tf.variable_scope('DlatentAvg'): batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) update_op = tf.assign( dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) with tf.control_dependencies([update_op]): dlatents = tf.identity(dlatents) # Perform style mixing regularization. if style_mixing_prob is not None: with tf.variable_scope('StyleMix'): latents2 = tf.random_normal(tf.shape(latents_in)) dlatents2 = components.mapping.get_output_for( latents2, labels_in, is_training=is_training, **kwargs) dlatents2 = tf.cast(dlatents2, tf.float32) layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 # original version mixing_cutoff = tf.cond( tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), lambda: cur_layers) """ # Diff Augment version mixing_cutoff = tf.where_v2( tf.random_uniform([tf.shape(dlatents)[0]], 0.0, 1.0) < style_mixing_prob, tf.random_uniform([tf.shape(dlatents)[0]], 1, cur_layers, dtype=tf.int32), cur_layers[np.newaxis])[:, np.newaxis, np.newaxis] dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) """ # Apply truncation trick. if truncation_psi is not None: with tf.variable_scope('Truncation'): layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] layer_psi = np.ones(layer_idx.shape, dtype=np.float32) if truncation_cutoff is None: layer_psi *= truncation_psi else: layer_psi = tf.where(layer_idx < truncation_cutoff, layer_psi * truncation_psi, layer_psi) dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi) # Evaluate synthesis network. deps = [] if 'lod' in components.synthesis.vars: deps.append(tf.assign(components.synthesis.vars['lod'], lod_in)) with tf.control_dependencies(deps): images_out = components.synthesis.get_output_for( dlatents, latmask, dconst, is_training=is_training, force_clean_graph=is_template_graph, **kwargs) # Return requested outputs. images_out = tf.identity(images_out, name='images_out') if return_dlatents: return images_out, dlatents return images_out
def _evaluate(self, Gs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph. distance_expr = [] distance_b_expr = [] distance_c_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() noise_vars = [ var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise') ] # Generate random latents and interpolation t-values. lat_b_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shapes[0][1:]) lat_c_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shapes[1][1:]) lerp_t = tf.random_uniform( [self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) # Interpolate in W or Z. if self.space == 'w': dlat_b_t01 = Gs_clone.components.mapping_b.get_output_for( lat_b_t01, None, is_validation=True) dlat_c_t01 = Gs_clone.components.mapping_c.get_output_for( lat_c_t01, None, is_validation=True) dlat_b_t0, dlat_b_t1 = dlat_b_t01[0::2], dlat_b_t01[1::2] dlat_c_t0, dlat_c_t1 = dlat_c_t01[0::2], dlat_c_t01[1::2] dlat_b_e0 = tflib.lerp(dlat_b_t0, dlat_b_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_c_e0 = tflib.lerp(dlat_c_t0, dlat_c_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_c_e1 = tflib.lerp( dlat_c_t0, dlat_c_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) tmp_b_e0 = dlat_b_e0[:, 0, :] tmp_b_e1 = tflib.lerp( dlat_b_t0, dlat_b_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)[:, 0, :] a1 = tf.reduce_sum(tmp_b_e0 * tmp_b_e1, axis=1, keepdims=True) a2 = tf.reduce_sum(tmp_b_e0 * tmp_b_e0, axis=1, keepdims=True) tmp_b_e1 = tmp_b_e1 - a1 / a2 * tmp_b_e0 tmp_b_e1 = tmp_b_e1 / tf.sqrt( tf.reduce_sum( tmp_b_e1 * tmp_b_e1, axis=1, keepdims=True)) dlat_b_e1 = tmp_b_e0 + self.epsilon * tmp_b_e1 dlat_b_e1 = tf.reshape(dlat_b_e1, [self.minibatch_per_gpu, 1, -1]) sh = dlat_b_e0.shape.as_list() dlat_b_e1 = tf.tile(dlat_b_e1, [1, sh[1], 1]) # caculate sololy and whole ppl # change b only dlat_b_e02 = tf.reshape( tf.stack([dlat_b_e0, dlat_b_e1], axis=1), dlat_b_t01.shape) dlat_c_e02 = tf.reshape( tf.stack([dlat_c_e0, dlat_c_e0], axis=1), dlat_c_t01.shape) # change c only dlat_b_e03 = tf.reshape( tf.stack([dlat_b_e0, dlat_b_e0], axis=1), dlat_b_t01.shape) dlat_c_e03 = tf.reshape( tf.stack([dlat_c_e0, dlat_c_e1], axis=1), dlat_c_t01.shape) # change all dlat_b_e04 = tf.reshape( tf.stack([dlat_b_e0, dlat_b_e1], axis=1), dlat_b_t01.shape) dlat_c_e04 = tf.reshape( tf.stack([dlat_c_e0, dlat_c_e1], axis=1), dlat_c_t01.shape) else: # space == 'z' lat_b_t0, lat_b_t1 = lat_b_t01[0::2], lat_b_t01[1::2] lat_c_t0, lat_c_t1 = lat_c_t01[0::2], lat_c_t01[1::2] lat_b_e0 = slerp(lat_b_t0, lat_b_t1, lerp_t[:, np.newaxis]) lat_c_e0 = slerp(lat_c_t0, lat_c_t1, lerp_t[:, np.newaxis]) lat_b_e1 = slerp(lat_b_t0, lat_b_t1, lerp_t[:, np.newaxis] + self.epsilon) lat_c_e1 = slerp(lat_c_t0, lat_c_t1, lerp_t[:, np.newaxis] + self.epsilon) # chnage b only lat_b_e02 = tf.reshape( tf.stack([lat_b_e0, lat_b_e1], axis=1), lat_b_t01.shape) lat_c_e02 = tf.reshape( tf.stack([lat_c_e0, lat_c_e0], axis=1), lat_c_t01.shape) dlat_b_e02 = Gs_clone.components.mapping_b.get_output_for( lat_b_e02, None, is_validation=True) dlat_c_e02 = Gs_clone.components.mapping_c.get_output_for( lat_c_e02, None, is_validation=True) # change c only lat_b_e03 = tf.reshape( tf.stack([lat_b_e0, lat_b_e0], axis=1), lat_b_t01.shape) lat_c_e03 = tf.reshape( tf.stack([lat_c_e0, lat_c_e1], axis=1), lat_c_t01.shape) dlat_b_e03 = Gs_clone.components.mapping_b.get_output_for( lat_b_e03, None, is_validation=True) dlat_c_e03 = Gs_clone.components.mapping_c.get_output_for( lat_c_e03, None, is_validation=True) # change a and b and c lat_b_e04 = tf.reshape( tf.stack([lat_b_e0, lat_b_e1], axis=1), lat_b_t01.shape) lat_c_e04 = tf.reshape( tf.stack([lat_c_e0, lat_c_e1], axis=1), lat_c_t01.shape) dlat_b_e04 = Gs_clone.components.mapping_b.get_output_for( lat_b_e04, None, is_validation=True) dlat_c_e04 = Gs_clone.components.mapping_c.get_output_for( lat_c_e04, None, is_validation=True) # Synthesize images. with tf.control_dependencies([ var.initializer for var in noise_vars ]): # use same noise inputs for the entire minibatch images_2 = \ Gs_clone.components.synthesis.get_output_for(dlat_b_e02, dlat_c_e02, is_validation=True, randomize_noise=False)[-1] images_3 = \ Gs_clone.components.synthesis.get_output_for(dlat_b_e03, dlat_c_e03, is_validation=True, randomize_noise=False)[-1] images_4 = \ Gs_clone.components.synthesis.get_output_for(dlat_b_e04, dlat_c_e04, is_validation=True, randomize_noise=False)[-1] # Crop only the face region. if self.crop: c = int(images_2.shape[2] // 8) images_2 = images_2[:, :, c * 3:c * 7, c * 2:c * 6] images_3 = images_3[:, :, c * 3:c * 7, c * 2:c * 6] images_4 = images_4[:, :, c * 3:c * 7, c * 2:c * 6] # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. if images_2.shape[2] > 256: factor = images_2.shape[2] // 256 images_2 = tf.reshape(images_2, [ -1, images_2.shape[1], images_2.shape[2] // factor, factor, images_2.shape[3] // factor, factor ]) images_2 = tf.reduce_mean(images_2, axis=[3, 5]) images_3 = tf.reshape(images_3, [ -1, images_3.shape[1], images_3.shape[2] // factor, factor, images_3.shape[3] // factor, factor ]) images_3 = tf.reduce_mean(images_3, axis=[3, 5]) images_4 = tf.reshape(images_4, [ -1, images_4.shape[1], images_4.shape[2] // factor, factor, images_4.shape[3] // factor, factor ]) images_4 = tf.reduce_mean(images_4, axis=[3, 5]) # Scale dynamic range from [-1,1] to [0,255] for VGG. images_2 = (images_2 + 1) * (255 / 2) images_3 = (images_3 + 1) * (255 / 2) images_4 = (images_4 + 1) * (255 / 2) # Evaluate perceptual distance. img_2_e0, img_2_e1 = images_2[0::2], images_2[1::2] img_3_e0, img_3_e1 = images_3[0::2], images_3[1::2] img_4_e0, img_4_e1 = images_4[0::2], images_4[1::2] distance_measure = misc.load_pkl( 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl' ) # vgg16_zhang_perceptual.pkl distance_b_expr.append( distance_measure.get_output_for(img_2_e0, img_2_e1) * (1 / self.epsilon**2)) distance_c_expr.append( distance_measure.get_output_for(img_3_e0, img_3_e1) * (1 / self.epsilon**2)) distance_expr.append( distance_measure.get_output_for(img_4_e0, img_4_e1) * (1 / self.epsilon**2)) # Sampling loop. all_distances = [] b_distance = [] c_distance = [] for _ in range(0, self.num_samples, minibatch_size): all_distances += tflib.run(distance_expr) b_distance += tflib.run(distance_b_expr) c_distance += tflib.run(distance_c_expr) all_distances = np.concatenate(all_distances, axis=0) b_distances = np.concatenate(b_distance, axis=0) c_distances = np.concatenate(c_distance, axis=0) # Reject outliers. lo = np.percentile(all_distances, 1, interpolation='lower') hi = np.percentile(all_distances, 99, interpolation='higher') filtered_distances = np.extract( np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) self._report_result(np.mean(filtered_distances), suffix='_bc') lo = np.percentile(b_distances, 1, interpolation='lower') hi = np.percentile(b_distances, 99, interpolation='higher') filtered_distances = np.extract( np.logical_and(lo <= b_distances, b_distances <= hi), b_distances) self._report_result(np.mean(filtered_distances), suffix='_b') lo = np.percentile(c_distances, 1, interpolation='lower') hi = np.percentile(c_distances, 99, interpolation='higher') filtered_distances = np.extract( np.logical_and(lo <= c_distances, c_distances <= hi), c_distances) self._report_result(np.mean(filtered_distances), suffix='_c')
def truncate(dlatents, truncation_psi, maxlayer = 8): dlatent_avg = tf.get_default_session().run(Gs.own_vars["dlatent_avg"]) layer_idx = np.arange(16)[np.newaxis, :, np.newaxis] ones = np.ones(layer_idx.shape, dtype=np.float32) coefs = tf.where(layer_idx < maxlayer, truncation_psi * ones, ones) return tf.get_default_session().run(tflib.lerp(dlatent_avg, dlatents, coefs))
def G_style( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. truncation_psi=0.7, # Style strength multiplier for the truncation trick. None = disable. truncation_cutoff=8, # Number of layers for which to apply the truncation trick. None = disable. truncation_psi_val=None, # Value for truncation_psi to use during validation. truncation_cutoff_val=None, # Value for truncation_cutoff to use during validation. dlatent_avg_beta=0.995, # Decay for tracking the moving average of W during training. None = disable. is_training=False, # Network is under training? Enables and disables specific features. is_validation=False, # Network is under validation? Chooses which value to use for truncation_psi. components=dnnlib.EasyDict( ), # Container for sub-networks. Retained between calls. **kwargs): # Arguments for sub-networks (G_mapping and G_synthesis). # Validate arguments. assert not is_training or not is_validation assert isinstance(components, dnnlib.EasyDict) if is_validation: truncation_psi = truncation_psi_val truncation_cutoff = truncation_cutoff_val if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): truncation_psi = None if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0): truncation_cutoff = None if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): dlatent_avg_beta = None # Setup components. if "synthesis" not in components: components.synthesis = tflib.Network(num_inputs=1, name="G_synthesis", func_name=G_synthesis, **kwargs) num_layers = components.synthesis.input_shape[1] dlatent_size = components.synthesis.input_shape[2] if "mapping" not in components: components.mapping = tflib.Network(num_inputs=2, name="G_mapping", func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs) # Setup variables. dlatent_avg = tf.get_variable( "dlatent_avg", shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False, ) # Evaluate mapping network. dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs) # Update moving average of W. if dlatent_avg_beta is not None: with tf.variable_scope("DlatentAvg"): batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) update_op = tf.assign( dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) with tf.control_dependencies([update_op]): dlatents = tf.identity(dlatents) # Apply truncation trick. if truncation_psi is not None and truncation_cutoff is not None: with tf.variable_scope("Truncation"): layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] ones = np.ones(layer_idx.shape, dtype=np.float32) coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones) dlatents = tflib.lerp(dlatent_avg, dlatents, coefs) # Evaluate synthesis network. images_out = components.synthesis.get_output_for(dlatents, **kwargs) return images_out
def grow(x, res, lod): y = block(res, x) img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod) img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod)) if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) return img()
def _evaluate(self, Gs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph. distance_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() try: # StyleGAN noise_vars = [ var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise') ] except AttributeError: # ProGAN noise_vars = [] # Generate random latents and interpolation t-values. lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) lerp_t = tf.random_uniform( [self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) # Interpolate in W or Z. if self.space == 'w': dlat_t01 = Gs_clone.components.mapping.get_output_for( lat_t01, None, is_validation=True) dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_e1 = tflib.lerp( dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) else: # space == 'z' lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) try: # StyleGAN: dlat_e01 = Gs_clone.components.mapping.get_output_for( lat_e01, None, is_validation=True) except AttributeError: # ProGAN dlat_e01 = lat_e01 # Synthesize images. with tf.control_dependencies([ var.initializer for var in noise_vars ]): # use same noise inputs for the entire minibatch try: # StyleGAN images = Gs_clone.components.synthesis.get_output_for( dlat_e01, is_validation=True, randomize_noise=False) except AttributeError: # ProGAN images = Gs_clone.get_output_for(dlat_e01, None, is_validation=True, randomize_noise=False) # Upscale images to 256x256 for VGG if images.shape[2] < 256: images = tf.transpose( images, [0, 2, 3, 1]) # (B, C, H, W) --> (B, H, W, C) images = tf.compat.v1.image.resize(images, (256, 256)) images = tf.transpose( images, [0, 3, 1, 2]) # (B, 256, 256, C) --> (B, C, 256, 256) # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. elif images.shape[2] > 256: factor = images.shape[2] // 256 images = tf.reshape(images, [ -1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor ]) images = tf.reduce_mean(images, axis=[3, 5]) # Scale dynamic range from [-1,1] to [0,255] for VGG. images = (images + 1) * (255 / 2) # Evaluate perceptual distance. img_e0, img_e1 = images[0::2], images[1::2] distance_measure = misc.load_pkl( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2' ) # vgg16_zhang_perceptual.pkl distance_expr.append( distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) # Sampling loop. all_distances = [] for _ in range(0, self.num_samples, minibatch_size): all_distances += tflib.run(distance_expr) all_distances = np.concatenate(all_distances, axis=0) # Reject outliers. lo = np.percentile(all_distances, 1, interpolation='lower') hi = np.percentile(all_distances, 99, interpolation='higher') filtered_distances = np.extract( np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) self._report_result(np.mean(filtered_distances))
def G_style( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. truncation_psi=0.7, # Style strength multiplier for the truncation trick. None = disable. truncation_cutoff=8, # Number of layers for which to apply the truncation trick. None = disable. truncation_psi_val=None, # Value for truncation_psi to use during validation. truncation_cutoff_val=None, # Value for truncation_cutoff to use during validation. dlatent_avg_beta=0.995, # Decay for tracking the moving average of W during training. None = disable. style_mixing_prob=0.9, # Probability of mixing styles during training. None = disable. is_training=False, # Network is under training? Enables and disables specific features. is_validation=False, # Network is under validation? Chooses which value to use for truncation_psi. is_template_graph=False, # True = template graph constructed by the Network class, False = actual evaluation. components=dnnlib.EasyDict( ), # Container for sub-networks. Retained between calls. **kwargs): # Arguments for sub-networks (G_mapping and G_synthesis). # Validate arguments. assert not is_training or not is_validation assert isinstance(components, dnnlib.EasyDict) if is_validation: truncation_psi = truncation_psi_val truncation_cutoff = truncation_cutoff_val if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): truncation_psi = None if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0): truncation_cutoff = None if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): dlatent_avg_beta = None if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): style_mixing_prob = None # Setup components. if 'synthesis' not in components: components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs) num_layers = components.synthesis.input_shape[1] dlatent_size = components.synthesis.input_shape[2] if 'mapping' not in components: components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs) # Setup variables. lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) # Evaluate mapping network. dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs) # Update moving average of W. if dlatent_avg_beta is not None: with tf.variable_scope('DlatentAvg'): batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) update_op = tf.assign( dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) with tf.control_dependencies([update_op]): dlatents = tf.identity(dlatents) # Perform style mixing regularization. if style_mixing_prob is not None: with tf.name_scope('StyleMix'): latents2 = tf.random_normal(tf.shape(latents_in)) dlatents2 = components.mapping.get_output_for( latents2, labels_in, **kwargs) layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 mixing_cutoff = tf.cond( tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), lambda: cur_layers) dlatents = tf.where( tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) # Apply truncation trick. if truncation_psi is not None and truncation_cutoff is not None: with tf.variable_scope('Truncation'): layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] ones = np.ones(layer_idx.shape, dtype=np.float32) coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones) dlatents = tflib.lerp(dlatent_avg, dlatents, coefs) # Evaluate synthesis network. with tf.control_dependencies( [tf.assign(components.synthesis.find_var('lod'), lod_in)]): images_out = components.synthesis.get_output_for( dlatents, force_clean_graph=is_template_graph, **kwargs) return tf.identity(images_out, name='images_out')
def G_logistic_ns_pathreg_interpolate(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2.0): _ = opt latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) labels = training_set.get_random_labels_tf(minibatch_size) latent_interpolate = tf.random_normal([1] + G.input_shapes[0][1:]) label_interpolate = training_set.get_random_labels_tf(1) # fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True) # 1. get dlatents for the batch by running G.mapping_latent (Shape: [4, 16, 512]) dlatents = G.components.mapping_latent.get_output_for(latents, is_training=True) dlatent_interpolate = G.components.mapping_latent.get_output_for(latent_interpolate, is_training=True) # 2. define an interpolation magnitude # interpolation_mag = 0.5 interpolation_mag = tf.clip_by_value(tf.random.normal([1], 0.5, 0.15), 0, 1) # 3. interpolate between interpolate between dlatents[0] and dlatents[1] (Shape: [1, 16, 512]) dlatent_interpolate = tf.expand_dims(tflib.lerp(dlatents[0], dlatent_interpolate, interpolation_mag), axis=0) # 4. replace dlatents[0] with interpolated dlatent (Shape: [4, 16, 512]) dlatents_replaced = tf.concat([dlatent_interpolate, dlatents[1:]], axis=0) # 5. mix the labels (Shape: [1, 127]) # wx1 = tf.where(labels[0] > labels[1], labels[0] * interpolation_mag, labels[0]) # wx2 = tf.where(labels[0] < labels[1], labels[1] * (1 - interpolation_mag), labels[1]) # mixed_label = tf.expand_dims(tf.clip_by_value(wx1 + wx2, 0, 1), axis=0) mixed_label = labels[0] * interpolation_mag + label_interpolate[0] * (1 - interpolation_mag) # 6. replace labels[0] with the new mixed label labels = tf.concat([mixed_label, labels[1:]], axis=0) # 7. run G.mapping_label dlabel = G.components.mapping_label.get_output_for(labels, is_training=True) # 8. add the mapped vectors fake_dlatents_out = dlatents_replaced + dlabel # 9. run G.synthesis with the new dlatents and generate the fake images fake_images_out = G.components.synthesis.get_output_for(fake_dlatents_out, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) # Path length regularization. with tf.name_scope('PathReg'): # Evaluate the regularization term using a smaller minibatch to conserve memory. if pl_minibatch_shrink > 1: pl_minibatch = minibatch_size // pl_minibatch_shrink pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:]) pl_labels = training_set.get_random_labels_tf(pl_minibatch) fake_images_out, fake_dlatents_out = G.get_output_for(pl_latents, pl_labels, is_training=True, return_dlatents=True) # Compute |J*y|. pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(np.prod(G.output_shape[2:])) pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0] pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) pl_lengths = autosummary('Loss/pl_lengths', pl_lengths) # Track exponential moving average of |J*y|. with tf.control_dependencies(None): pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32) pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) pl_update = tf.assign(pl_mean_var, pl_mean) # Calculate (|J*y|-a)^2. with tf.control_dependencies([pl_update]): pl_penalty = tf.square(pl_lengths - pl_mean) pl_penalty = autosummary('Loss/pl_penalty', pl_penalty) # Apply weight. # # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: # # gamma_pl = pl_weight / num_pixels / num_affine_layers # = 2 / (r^2) / (log2(r) * 2 - 2) # = 1 / (r^2 * (log2(r) - 1)) # = ln(2) / (r^2 * (ln(r) - ln(2)) # reg = pl_penalty * pl_weight return loss, reg