Ejemplo n.º 1
0
def process_reals(x, lod, mirror_augment, drange_data, drange_net):
    with tf.name_scope('ProcessReals'):
        with tf.name_scope('DynamicRange'):
            x = tf.cast(x, tf.float32)
            x = misc.adjust_dynamic_range(x, drange_data, drange_net)
        if mirror_augment:
            with tf.name_scope('MirrorAugment'):
                s = tf.shape(x)
                mask = tf.random_uniform([s[0], 1, 1, 1], 0.0, 1.0)
                mask = tf.tile(mask, [1, s[1], s[2], s[3]])
                x = tf.where(mask < 0.5, x, tf.reverse(x, axis=[3]))
        with tf.name_scope(
                'FadeLOD'
        ):  # Smooth crossfade between consecutive levels-of-detail.
            s = tf.shape(x)
            y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2])
            y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
            y = tf.tile(y, [1, 1, 1, 2, 1, 2])
            y = tf.reshape(y, [-1, s[1], s[2], s[3]])
            x = tflib.lerp(x, y, lod - tf.floor(lod))
        with tf.name_scope(
                'UpscaleLOD'
        ):  # Upscale to match the expected input/output size of the networks.
            s = tf.shape(x)
            factor = tf.cast(2**tf.floor(lod), tf.int32)
            x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
            x = tf.tile(x, [1, 1, 1, factor, 1, factor])
            x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
        return x
Ejemplo n.º 2
0
 def grow(x, res, lod):
     y = block(res, x)
     img = lambda: upscale2d(torgb(res, y), 2**lod)
     img = cset(
         img, (lod_in > lod), lambda: upscale2d(
             tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)),
                        lod_in - lod), 2**lod))
     if lod > 0:
         img = cset(img, (lod_in < lod),
                    lambda: grow(y, res + 1, lod - 1))
     return img()
Ejemplo n.º 3
0
 def grow(res, lod):
     x = lambda: fromrgb(downscale2d(images_in, 2**lod), res)
     if lod > 0:
         x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
     x = block(x(), res)
     y = lambda: x
     if res > 2:
         y = cset(
             y, (lod_in > lod), lambda: tflib.lerp(
                 x,
                 fromrgb(downscale2d(images_in, 2**(lod + 1)), res - 1),
                 lod_in - lod))
     return y()
Ejemplo n.º 4
0
def D_wgan_gp(
    G,
    D,
    opt,
    training_set,
    minibatch_size,
    reals,
    labels,  # pylint: disable=unused-argument
    wgan_lambda=10.0,  # Weight for the gradient penalty term.
    wgan_epsilon=0.001,  # Weight for the epsilon term, \epsilon_{drift}.
    wgan_target=1.0):  # Target value for gradient magnitudes.

    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    fake_images_out = G.get_output_for(latents, labels, is_training=True)
    real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True))
    fake_scores_out = fp32(
        D.get_output_for(fake_images_out, labels, is_training=True))
    real_scores_out = autosummary('Loss/scores/real', real_scores_out)
    fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
    loss = fake_scores_out - real_scores_out

    with tf.name_scope('GradientPenalty'):
        mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1],
                                           0.0,
                                           1.0,
                                           dtype=fake_images_out.dtype)
        mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype),
                                      fake_images_out, mixing_factors)
        mixed_scores_out = fp32(
            D.get_output_for(mixed_images_out, labels, is_training=True))
        mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out)
        mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out))
        mixed_grads = opt.undo_loss_scaling(
            fp32(tf.gradients(mixed_loss, [mixed_images_out])[0]))
        mixed_norms = tf.sqrt(
            tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3]))
        mixed_norms = autosummary('Loss/mixed_norms', mixed_norms)
        gradient_penalty = tf.square(mixed_norms - wgan_target)
    loss += gradient_penalty * (wgan_lambda / (wgan_target**2))

    with tf.name_scope('EpsilonPenalty'):
        epsilon_penalty = autosummary('Loss/epsilon_penalty',
                                      tf.square(real_scores_out))
    loss += epsilon_penalty * wgan_epsilon
    return loss
Ejemplo n.º 5
0
def G_style(
    latents_in,  # First input: Latent vectors (Z) [minibatch, latent_size].
    labels_in,  # Second input: Conditioning labels [minibatch, label_size].
    truncation_psi=0.7,  # Style strength multiplier for the truncation trick. None = disable.
    truncation_cutoff=8,  # Number of layers for which to apply the truncation trick. None = disable.
    truncation_psi_val=None,  # Value for truncation_psi to use during validation.
    truncation_cutoff_val=None,  # Value for truncation_cutoff to use during validation.
    dlatent_avg_beta=0.995,  # Decay for tracking the moving average of W during training. None = disable.
    style_mixing_prob=0.9,  # Probability of mixing styles during training. None = disable.
    is_training=False,  # Network is under training? Enables and disables specific features.
    is_validation=False,  # Network is under validation? Chooses which value to use for truncation_psi.
    is_template_graph=False,  # True = template graph constructed by the Network class, False = actual evaluation.
    components=dnnlib.EasyDict(
    ),  # Container for sub-networks. Retained between calls.
    **kwargs):  # Arguments for sub-networks (G_mapping and G_synthesis).

    # Validate arguments.
    assert not is_training or not is_validation
    assert isinstance(components, dnnlib.EasyDict)
    if is_validation:
        truncation_psi = truncation_psi_val
        truncation_cutoff = truncation_cutoff_val
    if is_training or (truncation_psi is not None
                       and not tflib.is_tf_expression(truncation_psi)
                       and truncation_psi == 1):
        truncation_psi = None
    if is_training or (truncation_cutoff is not None
                       and not tflib.is_tf_expression(truncation_cutoff)
                       and truncation_cutoff <= 0):
        truncation_cutoff = None
    if not is_training or (dlatent_avg_beta is not None
                           and not tflib.is_tf_expression(dlatent_avg_beta)
                           and dlatent_avg_beta == 1):
        dlatent_avg_beta = None
    if not is_training or (style_mixing_prob is not None
                           and not tflib.is_tf_expression(style_mixing_prob)
                           and style_mixing_prob <= 0):
        style_mixing_prob = None

    # Setup components.
    if 'synthesis' not in components:
        components.synthesis = tflib.Network('G_synthesis',
                                             func_name=G_synthesis,
                                             **kwargs)
    num_layers = components.synthesis.input_shape[1]
    dlatent_size = components.synthesis.input_shape[2]
    if 'mapping' not in components:
        components.mapping = tflib.Network('G_mapping',
                                           func_name=G_mapping,
                                           dlatent_broadcast=num_layers,
                                           **kwargs)

    # Setup variables.
    lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)
    dlatent_avg = tf.get_variable('dlatent_avg',
                                  shape=[dlatent_size],
                                  initializer=tf.initializers.zeros(),
                                  trainable=False)

    # Evaluate mapping network.
    dlatents = components.mapping.get_output_for(latents_in, labels_in,
                                                 **kwargs)

    # Update moving average of W.
    if dlatent_avg_beta is not None:
        with tf.variable_scope('DlatentAvg'):
            batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
            update_op = tf.assign(
                dlatent_avg,
                tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
            with tf.control_dependencies([update_op]):
                dlatents = tf.identity(dlatents)

    # Perform style mixing regularization.
    if style_mixing_prob is not None:
        with tf.name_scope('StyleMix'):
            latents2 = tf.random_normal(tf.shape(latents_in))
            dlatents2 = components.mapping.get_output_for(
                latents2, labels_in, **kwargs)
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
            cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2
            mixing_cutoff = tf.cond(
                tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,
                lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
                lambda: cur_layers)
            dlatents = tf.where(
                tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)),
                dlatents, dlatents2)

    # Apply truncation trick.
    if truncation_psi is not None and truncation_cutoff is not None:
        with tf.variable_scope('Truncation'):
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
            ones = np.ones(layer_idx.shape, dtype=np.float32)
            coefs = tf.where(layer_idx < truncation_cutoff,
                             truncation_psi * ones, ones)
            dlatents = tflib.lerp(dlatent_avg, dlatents, coefs)

    # Evaluate synthesis network.
    with tf.control_dependencies(
        [tf.assign(components.synthesis.find_var('lod'), lod_in)]):
        images_out = components.synthesis.get_output_for(
            dlatents, force_clean_graph=is_template_graph, **kwargs)
    return tf.identity(images_out, name='images_out')
Ejemplo n.º 6
0
    def _evaluate(self, Gs, num_gpus):
        minibatch_size = num_gpus * self.minibatch_per_gpu

        # Construct TensorFlow graph.
        distance_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                noise_vars = [
                    var for name, var in
                    Gs_clone.components.synthesis.vars.items()
                    if name.startswith('noise')
                ]

                # Generate random latents and interpolation t-values.
                lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] +
                                           Gs_clone.input_shape[1:])
                lerp_t = tf.random_uniform(
                    [self.minibatch_per_gpu], 0.0,
                    1.0 if self.sampling == 'full' else 0.0)

                # Interpolate in W or Z.
                if self.space == 'w':
                    dlat_t01 = Gs_clone.components.mapping.get_output_for(
                        lat_t01, None, is_validation=True)
                    dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
                    dlat_e0 = tflib.lerp(dlat_t0, dlat_t1,
                                         lerp_t[:, np.newaxis, np.newaxis])
                    dlat_e1 = tflib.lerp(
                        dlat_t0, dlat_t1,
                        lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
                    dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1),
                                          dlat_t01.shape)
                else:  # space == 'z'
                    lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
                    lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
                    lat_e1 = slerp(lat_t0, lat_t1,
                                   lerp_t[:, np.newaxis] + self.epsilon)
                    lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1),
                                         lat_t01.shape)
                    dlat_e01 = Gs_clone.components.mapping.get_output_for(
                        lat_e01, None, is_validation=True)

                # Synthesize images.
                with tf.control_dependencies([
                        var.initializer for var in noise_vars
                ]):  # use same noise inputs for the entire minibatch
                    images = Gs_clone.components.synthesis.get_output_for(
                        dlat_e01, is_validation=True, randomize_noise=False)

                # Crop only the face region.
                c = int(images.shape[2] // 8)
                images = images[:, :, c * 3:c * 7, c * 2:c * 6]

                # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
                if images.shape[2] > 256:
                    factor = images.shape[2] // 256
                    images = tf.reshape(images, [
                        -1, images.shape[1], images.shape[2] // factor, factor,
                        images.shape[3] // factor, factor
                    ])
                    images = tf.reduce_mean(images, axis=[3, 5])

                # Scale dynamic range from [-1,1] to [0,255] for VGG.
                images = (images + 1) * (255 / 2)

                # Evaluate perceptual distance.
                img_e0, img_e1 = images[0::2], images[1::2]
                distance_measure = misc.load_pkl(
                    'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2'
                )  # vgg16_zhang_perceptual.pkl
                distance_expr.append(
                    distance_measure.get_output_for(img_e0, img_e1) *
                    (1 / self.epsilon**2))

        # Sampling loop.
        all_distances = []
        for _ in range(0, self.num_samples, minibatch_size):
            all_distances += tflib.run(distance_expr)
        all_distances = np.concatenate(all_distances, axis=0)

        # Reject outliers.
        lo = np.percentile(all_distances, 1, interpolation='lower')
        hi = np.percentile(all_distances, 99, interpolation='higher')
        filtered_distances = np.extract(
            np.logical_and(lo <= all_distances, all_distances <= hi),
            all_distances)
        self._report_result(np.mean(filtered_distances))